mirror of https://github.com/torvalds/linux.git
We have been observing hangs, both of KVM guest vcpu tasks and more generally, where a process that is woken doesn't properly wake up and continue to run, but instead sticks in TASK_WAKING state. This happens because the update of rq->wake_list in ttwu_queue_remote() is not ordered with the update of ipi_message in smp_muxed_ipi_message_pass(), and the reading of rq->wake_list in scheduler_ipi() is not ordered with the reading of ipi_message in smp_ipi_demux(). Thus it is possible for the IPI receiver not to see the updated rq->wake_list and therefore conclude that there is nothing for it to do. In order to make sure that anything done before smp_send_reschedule() is ordered before anything done in the resulting call to scheduler_ipi(), this adds barriers in smp_muxed_message_pass() and smp_ipi_demux(). The barrier in smp_muxed_message_pass() is a full barrier to ensure that there is a full ordering between the smp_send_reschedule() caller and scheduler_ipi(). In smp_ipi_demux(), we use xchg() rather than xchg_local() because xchg() includes release and acquire barriers. Using xchg() rather than xchg_local() makes sense given that ipi_message is not just accessed locally. This moves the barrier between setting the message and calling the cause_ipi() function into the individual cause_ipi implementations. Most of them -- those that used outb, out_8 or similar -- already had a full barrier because out_8 etc. include a sync before the MMIO store. This adds an explicit barrier in the two remaining cases. These changes made no measurable difference to the speed of IPIs as measured using a simple ping-pong latency test across two CPUs on different cores of a POWER7 machine. The analysis of the reason why processes were not waking up properly is due to Milton Miller. Cc: stable@vger.kernel.org # v3.0+ Reported-by: Milton Miller <miltonm@bga.com> Signed-off-by: Paul Mackerras <paulus@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> |
||
|---|---|---|
| .. | ||
| vdso32 | ||
| vdso64 | ||
| .gitignore | ||
| Makefile | ||
| align.c | ||
| asm-offsets.c | ||
| audit.c | ||
| btext.c | ||
| cacheinfo.c | ||
| cacheinfo.h | ||
| clock.c | ||
| compat_audit.c | ||
| cpu_setup_6xx.S | ||
| cpu_setup_44x.S | ||
| cpu_setup_a2.S | ||
| cpu_setup_fsl_booke.S | ||
| cpu_setup_pa6t.S | ||
| cpu_setup_power7.S | ||
| cpu_setup_ppc970.S | ||
| cputable.c | ||
| crash.c | ||
| crash_dump.c | ||
| dbell.c | ||
| dma-iommu.c | ||
| dma-swiotlb.c | ||
| dma.c | ||
| entry_32.S | ||
| entry_64.S | ||
| epapr_hcalls.S | ||
| epapr_paravirt.c | ||
| exceptions-64e.S | ||
| exceptions-64s.S | ||
| fadump.c | ||
| firmware.c | ||
| fpu.S | ||
| fsl_booke_entry_mapping.S | ||
| ftrace.c | ||
| head_8xx.S | ||
| head_32.S | ||
| head_40x.S | ||
| head_44x.S | ||
| head_64.S | ||
| head_booke.h | ||
| head_fsl_booke.S | ||
| hw_breakpoint.c | ||
| ibmebus.c | ||
| idle.c | ||
| idle_6xx.S | ||
| idle_book3e.S | ||
| idle_e500.S | ||
| idle_power4.S | ||
| idle_power7.S | ||
| io-workarounds.c | ||
| io.c | ||
| iomap.c | ||
| iommu.c | ||
| irq.c | ||
| isa-bridge.c | ||
| jump_label.c | ||
| kgdb.c | ||
| kprobes.c | ||
| kvm.c | ||
| kvm_emul.S | ||
| l2cr_6xx.S | ||
| legacy_serial.c | ||
| lparcfg.c | ||
| machine_kexec.c | ||
| machine_kexec_32.c | ||
| machine_kexec_64.c | ||
| misc.S | ||
| misc_32.S | ||
| misc_64.S | ||
| module.c | ||
| module_32.c | ||
| module_64.c | ||
| msi.c | ||
| nvram_64.c | ||
| of_platform.c | ||
| paca.c | ||
| pci-common.c | ||
| pci_32.c | ||
| pci_64.c | ||
| pci_dn.c | ||
| pci_of_scan.c | ||
| pmc.c | ||
| ppc32.h | ||
| ppc_ksyms.c | ||
| ppc_save_regs.S | ||
| proc_powerpc.c | ||
| process.c | ||
| prom.c | ||
| prom_init.c | ||
| prom_init_check.sh | ||
| prom_parse.c | ||
| ptrace.c | ||
| ptrace32.c | ||
| reloc_32.S | ||
| reloc_64.S | ||
| rtas-proc.c | ||
| rtas-rtc.c | ||
| rtas.c | ||
| rtas_flash.c | ||
| rtas_pci.c | ||
| rtasd.c | ||
| setup-common.c | ||
| setup.h | ||
| setup_32.c | ||
| setup_64.c | ||
| signal.c | ||
| signal.h | ||
| signal_32.c | ||
| signal_64.c | ||
| smp-tbsync.c | ||
| smp.c | ||
| softemu8xx.c | ||
| stacktrace.c | ||
| suspend.c | ||
| swsusp.c | ||
| swsusp_32.S | ||
| swsusp_64.c | ||
| swsusp_asm64.S | ||
| swsusp_booke.S | ||
| sys_ppc32.c | ||
| syscalls.c | ||
| sysfs.c | ||
| systbl.S | ||
| systbl_chk.c | ||
| systbl_chk.sh | ||
| tau_6xx.c | ||
| time.c | ||
| traps.c | ||
| udbg.c | ||
| udbg_16550.c | ||
| vdso.c | ||
| vecemu.c | ||
| vector.S | ||
| vio.c | ||
| vmlinux.lds.S | ||