Merge tag 'v4.16-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux...
authorBoris Brezillon <boris.brezillon@bootlin.com>
Wed, 4 Apr 2018 20:13:35 +0000 (22:13 +0200)
committerBoris Brezillon <boris.brezillon@bootlin.com>
Wed, 4 Apr 2018 20:13:35 +0000 (22:13 +0200)
Backmerge v4.16-rc2 into mtd/next to resolve a conflict between Linus'
master branch and nand/for-4.17.

281 files changed:
Documentation/ABI/testing/sysfs-devices-platform-dock [new file with mode: 0644]
Documentation/ABI/testing/sysfs-devices-system-cpu
Documentation/ABI/testing/sysfs-platform-dptf [new file with mode: 0644]
Documentation/atomic_bitops.txt
Documentation/devicetree/bindings/power/mti,mips-cpc.txt [new file with mode: 0644]
Documentation/features/sched/membarrier-sync-core/arch-support.txt [new file with mode: 0644]
Documentation/locking/mutex-design.txt
MAINTAINERS
Makefile
arch/arm64/include/asm/cputype.h
arch/arm64/include/asm/hugetlb.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/mmu_context.h
arch/arm64/include/asm/pgalloc.h
arch/arm64/include/asm/pgtable.h
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/efi.c
arch/arm64/kernel/hibernate.c
arch/arm64/kvm/hyp/switch.c
arch/arm64/mm/dump.c
arch/arm64/mm/fault.c
arch/arm64/mm/hugetlbpage.c
arch/arm64/mm/kasan_init.c
arch/arm64/mm/mmu.c
arch/arm64/mm/pageattr.c
arch/arm64/mm/proc.S
arch/ia64/kernel/Makefile
arch/mips/kernel/mips-cpc.c
arch/mips/kernel/setup.c
arch/mips/kernel/smp-bmips.c
arch/powerpc/include/asm/book3s/32/pgtable.h
arch/powerpc/include/asm/book3s/64/hash-4k.h
arch/powerpc/include/asm/book3s/64/hash-64k.h
arch/powerpc/include/asm/book3s/64/hash.h
arch/powerpc/include/asm/book3s/64/pgalloc.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/exception-64s.h
arch/powerpc/include/asm/hw_irq.h
arch/powerpc/include/asm/kexec.h
arch/powerpc/include/asm/nohash/32/pgtable.h
arch/powerpc/include/asm/nohash/64/pgtable.h
arch/powerpc/include/asm/topology.h
arch/powerpc/kernel/exceptions-64e.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/sysfs.c
arch/powerpc/mm/drmem.c
arch/powerpc/mm/hash64_4k.c
arch/powerpc/mm/hash64_64k.c
arch/powerpc/mm/hash_utils_64.c
arch/powerpc/mm/hugetlbpage-hash64.c
arch/powerpc/mm/init-common.c
arch/powerpc/mm/numa.c
arch/powerpc/mm/pgtable-radix.c
arch/powerpc/mm/pgtable_64.c
arch/powerpc/mm/tlb_hash64.c
arch/powerpc/platforms/powernv/opal-imc.c
arch/powerpc/platforms/powernv/vas-window.c
arch/powerpc/platforms/pseries/hotplug-cpu.c
arch/powerpc/platforms/pseries/ras.c
arch/powerpc/sysdev/xive/spapr.c
arch/sparc/Kconfig
arch/x86/.gitignore
arch/x86/Kconfig
arch/x86/Kconfig.cpu
arch/x86/crypto/sha512-mb/sha512_mb_mgr_init_avx2.c
arch/x86/entry/calling.h
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/events/intel/core.c
arch/x86/events/intel/lbr.c
arch/x86/events/intel/p6.c
arch/x86/include/asm/acpi.h
arch/x86/include/asm/barrier.h
arch/x86/include/asm/bug.h
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/page_64.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/pgtable_32.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/smp.h
arch/x86/include/asm/tlbflush.h
arch/x86/kernel/amd_nb.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/centaur.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cyrix.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/intel_rdt.c
arch/x86/kernel/cpu/mcheck/mce-internal.h
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/cpu/mtrr/generic.c
arch/x86/kernel/cpu/mtrr/main.c
arch/x86/kernel/cpu/proc.c
arch/x86/kernel/head_32.S
arch/x86/kernel/mpparse.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/traps.c
arch/x86/kvm/mmu.c
arch/x86/kvm/vmx.c
arch/x86/lib/cpu.c
arch/x86/lib/error-inject.c
arch/x86/mm/init_64.c
arch/x86/mm/ioremap.c
arch/x86/mm/kmmio.c
arch/x86/mm/pgtable_32.c
arch/x86/mm/tlb.c
arch/x86/platform/uv/tlb_uv.c
arch/x86/xen/mmu_pv.c
arch/x86/xen/smp.c
block/blk-mq.c
crypto/sha3_generic.c
drivers/acpi/bus.c
drivers/acpi/ec.c
drivers/acpi/property.c
drivers/acpi/spcr.c
drivers/base/core.c
drivers/base/power/wakeirq.c
drivers/base/property.c
drivers/char/hw_random/via-rng.c
drivers/cpufreq/acpi-cpufreq.c
drivers/cpufreq/longhaul.c
drivers/cpufreq/p4-clockmod.c
drivers/cpufreq/powernow-k7.c
drivers/cpufreq/speedstep-centrino.c
drivers/cpufreq/speedstep-lib.c
drivers/crypto/caam/ctrl.c
drivers/crypto/padlock-aes.c
drivers/crypto/sunxi-ss/sun4i-ss-prng.c
drivers/crypto/talitos.c
drivers/edac/amd64_edac.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
drivers/gpu/drm/i915/gvt/kvmgt.c
drivers/gpu/drm/i915/gvt/mmio_context.c
drivers/gpu/drm/i915/gvt/trace.h
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem_context.c
drivers/gpu/drm/i915/i915_oa_cflgt3.c
drivers/gpu/drm/i915/i915_oa_cnl.c
drivers/gpu/drm/i915/i915_pmu.c
drivers/gpu/drm/i915/i915_pmu.h
drivers/gpu/drm/i915/intel_bios.c
drivers/gpu/drm/i915/intel_breadcrumbs.c
drivers/gpu/drm/i915/intel_cdclk.c
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_ringbuffer.h
drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
drivers/hwmon/coretemp.c
drivers/hwmon/hwmon-vid.c
drivers/hwmon/k10temp.c
drivers/hwmon/k8temp.c
drivers/irqchip/irq-bcm7038-l1.c
drivers/irqchip/irq-bcm7120-l2.c
drivers/irqchip/irq-brcmstb-l2.c
drivers/irqchip/irq-gic-v2m.c
drivers/irqchip/irq-gic-v3-its-pci-msi.c
drivers/irqchip/irq-gic-v3-its-platform-msi.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-mips-gic.c
drivers/macintosh/macio_asic.c
drivers/md/dm.c
drivers/misc/ocxl/file.c
drivers/mmc/host/bcm2835.c
drivers/mmc/host/meson-gx-mmc.c
drivers/mtd/nand/raw/Kconfig
drivers/mtd/nand/raw/vf610_nfc.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/target/io-cmd.c
drivers/of/property.c
drivers/opp/cpu.c
drivers/platform/x86/dell-laptop.c
drivers/platform/x86/ideapad-laptop.c
drivers/platform/x86/wmi.c
drivers/s390/virtio/virtio_ccw.c
drivers/staging/fsl-mc/bus/irq-gic-v3-its-fsl-mc-msi.c
drivers/usb/Kconfig
drivers/usb/host/Kconfig
drivers/video/fbdev/geode/video_gx.c
drivers/xen/pvcalls-front.c
drivers/xen/xenbus/xenbus.h
drivers/xen/xenbus/xenbus_comms.c
drivers/xen/xenbus/xenbus_xs.c
fs/btrfs/backref.c
fs/btrfs/delayed-ref.c
fs/btrfs/extent-tree.c
fs/btrfs/inode.c
fs/btrfs/qgroup.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/gfs2/bmap.c
fs/proc/kcore.c
include/asm-generic/bitops/lock.h
include/linux/acpi.h
include/linux/blkdev.h
include/linux/compiler-gcc.h
include/linux/compiler.h
include/linux/cpuidle.h
include/linux/cpumask.h
include/linux/dma-mapping.h
include/linux/fwnode.h
include/linux/kcore.h
include/linux/mm_inline.h
include/linux/nospec.h
include/linux/property.h
include/linux/semaphore.h
include/sound/ac97/regs.h
include/trace/events/xen.h
kernel/irq/irqdomain.c
kernel/kprobes.c
kernel/locking/qspinlock.c
kernel/sched/core.c
kernel/sched/cpufreq_schedutil.c
kernel/sched/deadline.c
kernel/sched/rt.c
lib/dma-direct.c
mm/memory-failure.c
mm/memory.c
net/9p/trans_virtio.c
sound/ac97/Kconfig
sound/core/seq/seq_clientmgr.c
sound/pci/hda/patch_realtek.c
sound/usb/mixer.c
sound/usb/pcm.c
sound/usb/quirks.c
tools/arch/powerpc/include/uapi/asm/kvm.h
tools/arch/s390/include/uapi/asm/unistd.h [deleted file]
tools/arch/x86/include/asm/cpufeatures.h
tools/include/uapi/drm/i915_drm.h
tools/include/uapi/linux/if_link.h
tools/include/uapi/linux/kvm.h
tools/objtool/check.c
tools/objtool/check.h
tools/perf/Documentation/perf-data.txt
tools/perf/arch/s390/Makefile
tools/perf/arch/s390/entry/syscalls/mksyscalltbl
tools/perf/arch/s390/entry/syscalls/syscall.tbl [new file with mode: 0644]
tools/perf/builtin-c2c.c
tools/perf/builtin-report.c
tools/perf/builtin-top.c
tools/perf/check-headers.sh
tools/perf/pmu-events/arch/arm64/cortex-a53/branch.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/bus.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/cache.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/memory.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/other.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/cortex-a53/pipeline.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/mapfile.csv
tools/perf/tests/backward-ring-buffer.c
tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
tools/perf/ui/browsers/hists.c
tools/perf/ui/browsers/hists.h
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/hist.h
tools/perf/util/mmap.c
tools/perf/util/mmap.h
tools/perf/util/util.c
tools/testing/selftests/powerpc/alignment/alignment_handler.c
tools/testing/selftests/x86/Makefile
tools/testing/selftests/x86/mpx-mini-test.c
tools/testing/selftests/x86/protection_keys.c
tools/testing/selftests/x86/single_step_syscall.c
tools/testing/selftests/x86/test_mremap_vdso.c
tools/testing/selftests/x86/test_vdso.c
tools/testing/selftests/x86/test_vsyscall.c

diff --git a/Documentation/ABI/testing/sysfs-devices-platform-dock b/Documentation/ABI/testing/sysfs-devices-platform-dock
new file mode 100644 (file)
index 0000000..1d8c18f
--- /dev/null
@@ -0,0 +1,39 @@
+What:          /sys/devices/platform/dock.N/docked
+Date:          Dec, 2006
+KernelVersion: 2.6.19
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Value 1 or 0 indicates whether the software believes the
+               laptop is docked in a docking station.
+
+What:          /sys/devices/platform/dock.N/undock
+Date:          Dec, 2006
+KernelVersion: 2.6.19
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (WO) Writing to this file causes the software to initiate an
+               undock request to the firmware.
+
+What:          /sys/devices/platform/dock.N/uid
+Date:          Feb, 2007
+KernelVersion: v2.6.21
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Displays the docking station the laptop is docked to.
+
+What:          /sys/devices/platform/dock.N/flags
+Date:          May, 2007
+KernelVersion: v2.6.21
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Show dock station flags, useful for checking if undock
+               request has been made by the user (from the immediate_undock
+               option).
+
+What:          /sys/devices/platform/dock.N/type
+Date:          Aug, 2008
+KernelVersion: v2.6.27
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Display the dock station type- dock_station, ata_bay or
+               battery_bay.
index bfd29bc8d37af1bbc4913deee9d941b1692bedda..4ed63b6cfb155cbc8b5aaab64e1030662f19411f 100644 (file)
@@ -108,6 +108,8 @@ Description:        CPU topology files that describe a logical CPU's relationship
 
 What:          /sys/devices/system/cpu/cpuidle/current_driver
                /sys/devices/system/cpu/cpuidle/current_governer_ro
+               /sys/devices/system/cpu/cpuidle/available_governors
+               /sys/devices/system/cpu/cpuidle/current_governor
 Date:          September 2007
 Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:   Discover cpuidle policy and mechanism
@@ -119,13 +121,84 @@ Description:      Discover cpuidle policy and mechanism
                Idle policy (governor) is differentiated from idle mechanism
                (driver)
 
-               current_driver: displays current idle mechanism
+               current_driver: (RO) displays current idle mechanism
 
-               current_governor_ro: displays current idle policy
+               current_governor_ro: (RO) displays current idle policy
+
+               With the cpuidle_sysfs_switch boot option enabled (meant for
+               developer testing), the following three attributes are visible
+               instead:
+
+               current_driver: same as described above
+
+               available_governors: (RO) displays a space separated list of
+               available governors
+
+               current_governor: (RW) displays current idle policy. Users can
+               switch the governor at runtime by writing to this file.
 
                See files in Documentation/cpuidle/ for more information.
 
 
+What:          /sys/devices/system/cpu/cpuX/cpuidle/stateN/name
+               /sys/devices/system/cpu/cpuX/cpuidle/stateN/latency
+               /sys/devices/system/cpu/cpuX/cpuidle/stateN/power
+               /sys/devices/system/cpu/cpuX/cpuidle/stateN/time
+               /sys/devices/system/cpu/cpuX/cpuidle/stateN/usage
+Date:          September 2007
+KernelVersion: v2.6.24
+Contact:       Linux power management list <linux-pm@vger.kernel.org>
+Description:
+               The directory /sys/devices/system/cpu/cpuX/cpuidle contains per
+               logical CPU specific cpuidle information for each online cpu X.
+               The processor idle states which are available for use have the
+               following attributes:
+
+               name: (RO) Name of the idle state (string).
+
+               latency: (RO) The latency to exit out of this idle state (in
+               microseconds).
+
+               power: (RO) The power consumed while in this idle state (in
+               milliwatts).
+
+               time: (RO) The total time spent in this idle state (in microseconds).
+
+               usage: (RO) Number of times this state was entered (a count).
+
+
+What:          /sys/devices/system/cpu/cpuX/cpuidle/stateN/desc
+Date:          February 2008
+KernelVersion: v2.6.25
+Contact:       Linux power management list <linux-pm@vger.kernel.org>
+Description:
+               (RO) A small description about the idle state (string).
+
+
+What:          /sys/devices/system/cpu/cpuX/cpuidle/stateN/disable
+Date:          March 2012
+KernelVersion: v3.10
+Contact:       Linux power management list <linux-pm@vger.kernel.org>
+Description:
+               (RW) Option to disable this idle state (bool). The behavior and
+               the effect of the disable variable depends on the implementation
+               of a particular governor. In the ladder governor, for example,
+               it is not coherent, i.e. if one is disabling a light state, then
+               all deeper states are disabled as well, but the disable variable
+               does not reflect it. Likewise, if one enables a deep state but a
+               lighter state still is disabled, then this has no effect.
+
+
+What:          /sys/devices/system/cpu/cpuX/cpuidle/stateN/residency
+Date:          March 2014
+KernelVersion: v3.15
+Contact:       Linux power management list <linux-pm@vger.kernel.org>
+Description:
+               (RO) Display the target residency i.e. the minimum amount of
+               time (in microseconds) this cpu should spend in this idle state
+               to make the transition worth the effort.
+
+
 What:          /sys/devices/system/cpu/cpu#/cpufreq/*
 Date:          pre-git history
 Contact:       linux-pm@vger.kernel.org
diff --git a/Documentation/ABI/testing/sysfs-platform-dptf b/Documentation/ABI/testing/sysfs-platform-dptf
new file mode 100644 (file)
index 0000000..325dc06
--- /dev/null
@@ -0,0 +1,40 @@
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/charger_type
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) The charger type - Traditional, Hybrid or NVDC.
+
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/adapter_rating_mw
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Adapter rating in milliwatts (the maximum Adapter power).
+               Must be 0 if no AC Adaptor is plugged in.
+
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/max_platform_power_mw
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Maximum platform power that can be supported by the battery
+               in milliwatts.
+
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/platform_power_source
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) Display the platform power source
+               0x00 = DC
+               0x01 = AC
+               0x02 = USB
+               0x03 = Wireless Charger
+
+What:          /sys/bus/platform/devices/INT3407:00/dptf_power/battery_steady_power
+Date:          Jul, 2016
+KernelVersion: v4.10
+Contact:       linux-acpi@vger.kernel.org
+Description:
+               (RO) The maximum sustained power for battery in milliwatts.
index 5550bfdcce5f1cfaab57728f4c272e9e84902860..be70b32c95d918066ffa72dfa4a69e8b4e51a225 100644 (file)
@@ -58,7 +58,12 @@ Like with atomic_t, the rule of thumb is:
 
  - RMW operations that have a return value are fully ordered.
 
-Except for test_and_set_bit_lock() which has ACQUIRE semantics and
+ - RMW operations that are conditional are unordered on FAILURE,
+   otherwise the above rules apply. In the case of test_and_{}_bit() operations,
+   if the bit in memory is unchanged by the operation then it is deemed to have
+   failed.
+
+Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and
 clear_bit_unlock() which has RELEASE semantics.
 
 Since a platform only has a single means of achieving atomic operations
diff --git a/Documentation/devicetree/bindings/power/mti,mips-cpc.txt b/Documentation/devicetree/bindings/power/mti,mips-cpc.txt
new file mode 100644 (file)
index 0000000..c6b8251
--- /dev/null
@@ -0,0 +1,8 @@
+Binding for MIPS Cluster Power Controller (CPC).
+
+This binding allows a system to specify where the CPC registers are
+located.
+
+Required properties:
+compatible : Should be "mti,mips-cpc".
+regs: Should describe the address & size of the CPC register region.
diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
new file mode 100644 (file)
index 0000000..2c815a7
--- /dev/null
@@ -0,0 +1,62 @@
+#
+# Feature name:          membarrier-sync-core
+#         Kconfig:       ARCH_HAS_MEMBARRIER_SYNC_CORE
+#         description:   arch supports core serializing membarrier
+#
+# Architecture requirements
+#
+# * arm64
+#
+# Rely on eret context synchronization when returning from IPI handler, and
+# when returning to user-space.
+#
+# * x86
+#
+# x86-32 uses IRET as return from interrupt, which takes care of the IPI.
+# However, it uses both IRET and SYSEXIT to go back to user-space. The IRET
+# instruction is core serializing, but not SYSEXIT.
+#
+# x86-64 uses IRET as return from interrupt, which takes care of the IPI.
+# However, it can return to user-space through either SYSRETL (compat code),
+# SYSRETQ, or IRET.
+#
+# Given that neither SYSRET{L,Q}, nor SYSEXIT, are core serializing, we rely
+# instead on write_cr3() performed by switch_mm() to provide core serialization
+# after changing the current mm, and deal with the special case of kthread ->
+# uthread (temporarily keeping current mm into active_mm) by issuing a
+# sync_core_before_usermode() in that specific case.
+#
+    -----------------------
+    |         arch |status|
+    -----------------------
+    |       alpha: | TODO |
+    |         arc: | TODO |
+    |         arm: | TODO |
+    |       arm64: |  ok  |
+    |    blackfin: | TODO |
+    |         c6x: | TODO |
+    |        cris: | TODO |
+    |         frv: | TODO |
+    |       h8300: | TODO |
+    |     hexagon: | TODO |
+    |        ia64: | TODO |
+    |        m32r: | TODO |
+    |        m68k: | TODO |
+    |       metag: | TODO |
+    |  microblaze: | TODO |
+    |        mips: | TODO |
+    |     mn10300: | TODO |
+    |       nios2: | TODO |
+    |    openrisc: | TODO |
+    |      parisc: | TODO |
+    |     powerpc: | TODO |
+    |        s390: | TODO |
+    |       score: | TODO |
+    |          sh: | TODO |
+    |       sparc: | TODO |
+    |        tile: | TODO |
+    |          um: | TODO |
+    |   unicore32: | TODO |
+    |         x86: |  ok  |
+    |      xtensa: | TODO |
+    -----------------------
index 60c482df1a38db2b300952832095ef41e2b8e655..818aca19612f4a763c96ee2eb1fbf8b3cb452e9e 100644 (file)
@@ -21,37 +21,23 @@ Implementation
 --------------
 
 Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
-and implemented in kernel/locking/mutex.c. These locks use a three
-state atomic counter (->count) to represent the different possible
-transitions that can occur during the lifetime of a lock:
-
-         1: unlocked
-         0: locked, no waiters
-   negative: locked, with potential waiters
-
-In its most basic form it also includes a wait-queue and a spinlock
-that serializes access to it. CONFIG_SMP systems can also include
-a pointer to the lock task owner (->owner) as well as a spinner MCS
-lock (->osq), both described below in (ii).
+and implemented in kernel/locking/mutex.c. These locks use an atomic variable
+(->owner) to keep track of the lock state during its lifetime.  Field owner
+actually contains 'struct task_struct *' to the current lock owner and it is
+therefore NULL if not currently owned. Since task_struct pointers are aligned
+at at least L1_CACHE_BYTES, low bits (3) are used to store extra state (e.g.,
+if waiter list is non-empty).  In its most basic form it also includes a
+wait-queue and a spinlock that serializes access to it. Furthermore,
+CONFIG_MUTEX_SPIN_ON_OWNER=y systems use a spinner MCS lock (->osq), described
+below in (ii).
 
 When acquiring a mutex, there are three possible paths that can be
 taken, depending on the state of the lock:
 
-(i) fastpath: tries to atomically acquire the lock by decrementing the
-    counter. If it was already taken by another task it goes to the next
-    possible path. This logic is architecture specific. On x86-64, the
-    locking fastpath is 2 instructions:
-
-    0000000000000e10 <mutex_lock>:
-    e21:   f0 ff 0b                lock decl (%rbx)
-    e24:   79 08                   jns    e2e <mutex_lock+0x1e>
-
-   the unlocking fastpath is equally tight:
-
-    0000000000000bc0 <mutex_unlock>:
-    bc8:   f0 ff 07                lock incl (%rdi)
-    bcb:   7f 0a                   jg     bd7 <mutex_unlock+0x17>
-
+(i) fastpath: tries to atomically acquire the lock by cmpxchg()ing the owner with
+    the current task. This only works in the uncontended case (cmpxchg() checks
+    against 0UL, so all 3 state bits above have to be 0). If the lock is
+    contended it goes to the next possible path.
 
 (ii) midpath: aka optimistic spinning, tries to spin for acquisition
      while the lock owner is running and there are no other tasks ready
@@ -143,11 +129,10 @@ Test if the mutex is taken:
 Disadvantages
 -------------
 
-Unlike its original design and purpose, 'struct mutex' is larger than
-most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
-as large as 'struct semaphore' (24 bytes) and tied, along with rwsems,
-for the largest lock in the kernel. Larger structure sizes mean more
-CPU cache and memory footprint.
+Unlike its original design and purpose, 'struct mutex' is among the largest
+locks in the kernel. E.g: on x86-64 it is 32 bytes, where 'struct semaphore'
+is 24 bytes and rw_semaphore is 40 bytes. Larger structure sizes mean more CPU
+cache and memory footprint.
 
 When to use mutexes
 -------------------
index fc3427d11d75a62a35101414932ca0fd6ce181c8..ce3519e1c18c76dfb0eee38b6890e40281d395ab 100644 (file)
@@ -9205,6 +9205,7 @@ MIPS GENERIC PLATFORM
 M:     Paul Burton <paul.burton@mips.com>
 L:     linux-mips@linux-mips.org
 S:     Supported
+F:     Documentation/devicetree/bindings/power/mti,mips-cpc.txt
 F:     arch/mips/generic/
 F:     arch/mips/tools/generic-board-config.sh
 
@@ -9944,6 +9945,7 @@ F:        drivers/nfc/nxp-nci
 
 OBJTOOL
 M:     Josh Poimboeuf <jpoimboe@redhat.com>
+M:     Peter Zijlstra <peterz@infradead.org>
 S:     Supported
 F:     tools/objtool/
 
index 79ad2bfa24b68f279af011f82e87f16d64d1440d..d9cf3a40eda9d20ce03ceda2ebc921a95dc2aea7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 4
 PATCHLEVEL = 16
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Fearless Coyote
 
 # *DOCUMENTATION*
index be7bd19c87ec23949c4dcbdfe8bbd4972dbae00a..eda8c5f629fc8553af2cdccaede5dfd209cecf94 100644 (file)
@@ -20,7 +20,7 @@
 
 #define MPIDR_UP_BITMASK       (0x1 << 30)
 #define MPIDR_MT_BITMASK       (0x1 << 24)
-#define MPIDR_HWID_BITMASK     0xff00ffffff
+#define MPIDR_HWID_BITMASK     0xff00ffffffUL
 
 #define MPIDR_LEVEL_BITS_SHIFT 3
 #define MPIDR_LEVEL_BITS       (1 << MPIDR_LEVEL_BITS_SHIFT)
index 1dca41bea16ad61fc8fe6f2be528ba452bffdf27..e73f6856962461952287b244831395200cdc3853 100644 (file)
@@ -22,7 +22,7 @@
 
 static inline pte_t huge_ptep_get(pte_t *ptep)
 {
-       return *ptep;
+       return READ_ONCE(*ptep);
 }
 
 
index 9679067a15746ce921ba5a138c6508c0a4972d0a..7faed6e48b46212709485b7225c512f3fb99831e 100644 (file)
@@ -185,42 +185,42 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
        return pmd;
 }
 
-static inline void kvm_set_s2pte_readonly(pte_t *pte)
+static inline void kvm_set_s2pte_readonly(pte_t *ptep)
 {
        pteval_t old_pteval, pteval;
 
-       pteval = READ_ONCE(pte_val(*pte));
+       pteval = READ_ONCE(pte_val(*ptep));
        do {
                old_pteval = pteval;
                pteval &= ~PTE_S2_RDWR;
                pteval |= PTE_S2_RDONLY;
-               pteval = cmpxchg_relaxed(&pte_val(*pte), old_pteval, pteval);
+               pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval);
        } while (pteval != old_pteval);
 }
 
-static inline bool kvm_s2pte_readonly(pte_t *pte)
+static inline bool kvm_s2pte_readonly(pte_t *ptep)
 {
-       return (pte_val(*pte) & PTE_S2_RDWR) == PTE_S2_RDONLY;
+       return (READ_ONCE(pte_val(*ptep)) & PTE_S2_RDWR) == PTE_S2_RDONLY;
 }
 
-static inline bool kvm_s2pte_exec(pte_t *pte)
+static inline bool kvm_s2pte_exec(pte_t *ptep)
 {
-       return !(pte_val(*pte) & PTE_S2_XN);
+       return !(READ_ONCE(pte_val(*ptep)) & PTE_S2_XN);
 }
 
-static inline void kvm_set_s2pmd_readonly(pmd_t *pmd)
+static inline void kvm_set_s2pmd_readonly(pmd_t *pmdp)
 {
-       kvm_set_s2pte_readonly((pte_t *)pmd);
+       kvm_set_s2pte_readonly((pte_t *)pmdp);
 }
 
-static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
+static inline bool kvm_s2pmd_readonly(pmd_t *pmdp)
 {
-       return kvm_s2pte_readonly((pte_t *)pmd);
+       return kvm_s2pte_readonly((pte_t *)pmdp);
 }
 
-static inline bool kvm_s2pmd_exec(pmd_t *pmd)
+static inline bool kvm_s2pmd_exec(pmd_t *pmdp)
 {
-       return !(pmd_val(*pmd) & PMD_S2_XN);
+       return !(READ_ONCE(pmd_val(*pmdp)) & PMD_S2_XN);
 }
 
 static inline bool kvm_page_empty(void *ptr)
index 8d3331985d2e34b2099eab6cec8b456d40983052..39ec0b8a689eea3e495029685bed047737d64c5e 100644 (file)
@@ -141,13 +141,13 @@ static inline void cpu_install_idmap(void)
  * Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
  * avoiding the possibility of conflicting TLB entries being allocated.
  */
-static inline void cpu_replace_ttbr1(pgd_t *pgd)
+static inline void cpu_replace_ttbr1(pgd_t *pgdp)
 {
        typedef void (ttbr_replace_func)(phys_addr_t);
        extern ttbr_replace_func idmap_cpu_replace_ttbr1;
        ttbr_replace_func *replace_phys;
 
-       phys_addr_t pgd_phys = virt_to_phys(pgd);
+       phys_addr_t pgd_phys = virt_to_phys(pgdp);
 
        replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
 
index e9d9f1b006efec5708fd0f33c006d2e017711a4d..2e05bcd944c8395b9fd5af993b6214054352aefc 100644 (file)
@@ -36,23 +36,23 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
        return (pmd_t *)__get_free_page(PGALLOC_GFP);
 }
 
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
 {
-       BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
-       free_page((unsigned long)pmd);
+       BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
+       free_page((unsigned long)pmdp);
 }
 
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 {
-       set_pud(pud, __pud(__phys_to_pud_val(pmd) | prot));
+       set_pud(pudp, __pud(__phys_to_pud_val(pmdp) | prot));
 }
 
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp)
 {
-       __pud_populate(pud, __pa(pmd), PMD_TYPE_TABLE);
+       __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE);
 }
 #else
-static inline void __pud_populate(pud_t *pud, phys_addr_t pmd, pudval_t prot)
+static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot)
 {
        BUILD_BUG();
 }
@@ -65,30 +65,30 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
        return (pud_t *)__get_free_page(PGALLOC_GFP);
 }
 
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
 {
-       BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-       free_page((unsigned long)pud);
+       BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
+       free_page((unsigned long)pudp);
 }
 
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
 {
-       set_pgd(pgdp, __pgd(__phys_to_pgd_val(pud) | prot));
+       set_pgd(pgdp, __pgd(__phys_to_pgd_val(pudp) | prot));
 }
 
-static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
+static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgdp, pud_t *pudp)
 {
-       __pgd_populate(pgd, __pa(pud), PUD_TYPE_TABLE);
+       __pgd_populate(pgdp, __pa(pudp), PUD_TYPE_TABLE);
 }
 #else
-static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pud, pgdval_t prot)
+static inline void __pgd_populate(pgd_t *pgdp, phys_addr_t pudp, pgdval_t prot)
 {
        BUILD_BUG();
 }
 #endif /* CONFIG_PGTABLE_LEVELS > 3 */
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
+extern void pgd_free(struct mm_struct *mm, pgd_t *pgdp);
 
 static inline pte_t *
 pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr)
@@ -114,10 +114,10 @@ pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 /*
  * Free a PTE table.
  */
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *ptep)
 {
-       if (pte)
-               free_page((unsigned long)pte);
+       if (ptep)
+               free_page((unsigned long)ptep);
 }
 
 static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
@@ -126,10 +126,10 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
        __free_page(pte);
 }
 
-static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t pte,
+static inline void __pmd_populate(pmd_t *pmdp, phys_addr_t ptep,
                                  pmdval_t prot)
 {
-       set_pmd(pmdp, __pmd(__phys_to_pmd_val(pte) | prot));
+       set_pmd(pmdp, __pmd(__phys_to_pmd_val(ptep) | prot));
 }
 
 /*
index 094374c82db088816d6a35ec75e1658dfc446ec4..7e2c27e63cd894371655a569046faaa67cfc1837 100644 (file)
@@ -218,7 +218,7 @@ static inline pmd_t pmd_mkcont(pmd_t pmd)
 
 static inline void set_pte(pte_t *ptep, pte_t pte)
 {
-       *ptep = pte;
+       WRITE_ONCE(*ptep, pte);
 
        /*
         * Only if the new pte is valid and kernel, otherwise TLB maintenance
@@ -250,6 +250,8 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr);
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                              pte_t *ptep, pte_t pte)
 {
+       pte_t old_pte;
+
        if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
                __sync_icache_dcache(pte, addr);
 
@@ -258,14 +260,15 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
         * hardware updates of the pte (ptep_set_access_flags safely changes
         * valid ptes without going through an invalid entry).
         */
-       if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(*ptep) && pte_valid(pte) &&
+       old_pte = READ_ONCE(*ptep);
+       if (IS_ENABLED(CONFIG_DEBUG_VM) && pte_valid(old_pte) && pte_valid(pte) &&
           (mm == current->active_mm || atomic_read(&mm->mm_users) > 1)) {
                VM_WARN_ONCE(!pte_young(pte),
                             "%s: racy access flag clearing: 0x%016llx -> 0x%016llx",
-                            __func__, pte_val(*ptep), pte_val(pte));
-               VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte),
+                            __func__, pte_val(old_pte), pte_val(pte));
+               VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte),
                             "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx",
-                            __func__, pte_val(*ptep), pte_val(pte));
+                            __func__, pte_val(old_pte), pte_val(pte));
        }
 
        set_pte(ptep, pte);
@@ -431,7 +434,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
-       *pmdp = pmd;
+       WRITE_ONCE(*pmdp, pmd);
        dsb(ishst);
        isb();
 }
@@ -482,7 +485,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
 
 static inline void set_pud(pud_t *pudp, pud_t pud)
 {
-       *pudp = pud;
+       WRITE_ONCE(*pudp, pud);
        dsb(ishst);
        isb();
 }
@@ -500,7 +503,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 /* Find an entry in the second-level page table. */
 #define pmd_index(addr)                (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
 
-#define pmd_offset_phys(dir, addr)     (pud_page_paddr(*(dir)) + pmd_index(addr) * sizeof(pmd_t))
+#define pmd_offset_phys(dir, addr)     (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
 #define pmd_offset(dir, addr)          ((pmd_t *)__va(pmd_offset_phys((dir), (addr))))
 
 #define pmd_set_fixmap(addr)           ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
@@ -535,7 +538,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud)
 
 static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
-       *pgdp = pgd;
+       WRITE_ONCE(*pgdp, pgd);
        dsb(ishst);
 }
 
@@ -552,7 +555,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
 /* Find an entry in the frst-level page table. */
 #define pud_index(addr)                (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
 
-#define pud_offset_phys(dir, addr)     (pgd_page_paddr(*(dir)) + pud_index(addr) * sizeof(pud_t))
+#define pud_offset_phys(dir, addr)     (pgd_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
 #define pud_offset(dir, addr)          ((pud_t *)__va(pud_offset_phys((dir), (addr))))
 
 #define pud_set_fixmap(addr)           ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
index 07823595b7f01690823da724584965bca0872588..52f15cd896e11ad631ac3092d9709337a9629bb4 100644 (file)
@@ -406,6 +406,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
                .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
                MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
        },
+       {
+               .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
+               MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+               .enable = qcom_enable_link_stack_sanitization,
+       },
+       {
+               .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT,
+               MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
+       },
        {
                .capability = ARM64_HARDEN_BRANCH_PREDICTOR,
                MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
index f85ac58d08a35676f38fa2a6d7b0887fb6f1d2ec..a8bf1c892b9065ca40ed4b263317deced2b8d693 100644 (file)
@@ -90,7 +90,7 @@ static int __init set_permissions(pte_t *ptep, pgtable_t token,
                                  unsigned long addr, void *data)
 {
        efi_memory_desc_t *md = data;
-       pte_t pte = *ptep;
+       pte_t pte = READ_ONCE(*ptep);
 
        if (md->attribute & EFI_MEMORY_RO)
                pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
index f20cf7e992495adffcd2049c4c40f9ec6798c99f..1ec5f28c39fc56c4aae85cc5801bd513cc3ea2c3 100644 (file)
@@ -202,10 +202,10 @@ static int create_safe_exec_page(void *src_start, size_t length,
                                 gfp_t mask)
 {
        int rc = 0;
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
+       pte_t *ptep;
        unsigned long dst = (unsigned long)allocator(mask);
 
        if (!dst) {
@@ -216,38 +216,38 @@ static int create_safe_exec_page(void *src_start, size_t length,
        memcpy((void *)dst, src_start, length);
        flush_icache_range(dst, dst + length);
 
-       pgd = pgd_offset_raw(allocator(mask), dst_addr);
-       if (pgd_none(*pgd)) {
-               pud = allocator(mask);
-               if (!pud) {
+       pgdp = pgd_offset_raw(allocator(mask), dst_addr);
+       if (pgd_none(READ_ONCE(*pgdp))) {
+               pudp = allocator(mask);
+               if (!pudp) {
                        rc = -ENOMEM;
                        goto out;
                }
-               pgd_populate(&init_mm, pgd, pud);
+               pgd_populate(&init_mm, pgdp, pudp);
        }
 
-       pud = pud_offset(pgd, dst_addr);
-       if (pud_none(*pud)) {
-               pmd = allocator(mask);
-               if (!pmd) {
+       pudp = pud_offset(pgdp, dst_addr);
+       if (pud_none(READ_ONCE(*pudp))) {
+               pmdp = allocator(mask);
+               if (!pmdp) {
                        rc = -ENOMEM;
                        goto out;
                }
-               pud_populate(&init_mm, pud, pmd);
+               pud_populate(&init_mm, pudp, pmdp);
        }
 
-       pmd = pmd_offset(pud, dst_addr);
-       if (pmd_none(*pmd)) {
-               pte = allocator(mask);
-               if (!pte) {
+       pmdp = pmd_offset(pudp, dst_addr);
+       if (pmd_none(READ_ONCE(*pmdp))) {
+               ptep = allocator(mask);
+               if (!ptep) {
                        rc = -ENOMEM;
                        goto out;
                }
-               pmd_populate_kernel(&init_mm, pmd, pte);
+               pmd_populate_kernel(&init_mm, pmdp, ptep);
        }
 
-       pte = pte_offset_kernel(pmd, dst_addr);
-       set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
+       ptep = pte_offset_kernel(pmdp, dst_addr);
+       set_pte(ptep, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC));
 
        /*
         * Load our new page tables. A strict BBM approach requires that we
@@ -263,7 +263,7 @@ static int create_safe_exec_page(void *src_start, size_t length,
         */
        cpu_set_reserved_ttbr0();
        local_flush_tlb_all();
-       write_sysreg(phys_to_ttbr(virt_to_phys(pgd)), ttbr0_el1);
+       write_sysreg(phys_to_ttbr(virt_to_phys(pgdp)), ttbr0_el1);
        isb();
 
        *phys_dst_addr = virt_to_phys((void *)dst);
@@ -320,9 +320,9 @@ int swsusp_arch_suspend(void)
        return ret;
 }
 
-static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
+static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
 {
-       pte_t pte = *src_pte;
+       pte_t pte = READ_ONCE(*src_ptep);
 
        if (pte_valid(pte)) {
                /*
@@ -330,7 +330,7 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
                 * read only (code, rodata). Clear the RDONLY bit from
                 * the temporary mappings we use during restore.
                 */
-               set_pte(dst_pte, pte_mkwrite(pte));
+               set_pte(dst_ptep, pte_mkwrite(pte));
        } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
                /*
                 * debug_pagealloc will removed the PTE_VALID bit if
@@ -343,112 +343,116 @@ static void _copy_pte(pte_t *dst_pte, pte_t *src_pte, unsigned long addr)
                 */
                BUG_ON(!pfn_valid(pte_pfn(pte)));
 
-               set_pte(dst_pte, pte_mkpresent(pte_mkwrite(pte)));
+               set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
        }
 }
 
-static int copy_pte(pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long start,
+static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
                    unsigned long end)
 {
-       pte_t *src_pte;
-       pte_t *dst_pte;
+       pte_t *src_ptep;
+       pte_t *dst_ptep;
        unsigned long addr = start;
 
-       dst_pte = (pte_t *)get_safe_page(GFP_ATOMIC);
-       if (!dst_pte)
+       dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
+       if (!dst_ptep)
                return -ENOMEM;
-       pmd_populate_kernel(&init_mm, dst_pmd, dst_pte);
-       dst_pte = pte_offset_kernel(dst_pmd, start);
+       pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
+       dst_ptep = pte_offset_kernel(dst_pmdp, start);
 
-       src_pte = pte_offset_kernel(src_pmd, start);
+       src_ptep = pte_offset_kernel(src_pmdp, start);
        do {
-               _copy_pte(dst_pte, src_pte, addr);
-       } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+               _copy_pte(dst_ptep, src_ptep, addr);
+       } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
 
        return 0;
 }
 
-static int copy_pmd(pud_t *dst_pud, pud_t *src_pud, unsigned long start,
+static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
                    unsigned long end)
 {
-       pmd_t *src_pmd;
-       pmd_t *dst_pmd;
+       pmd_t *src_pmdp;
+       pmd_t *dst_pmdp;
        unsigned long next;
        unsigned long addr = start;
 
-       if (pud_none(*dst_pud)) {
-               dst_pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
-               if (!dst_pmd)
+       if (pud_none(READ_ONCE(*dst_pudp))) {
+               dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
+               if (!dst_pmdp)
                        return -ENOMEM;
-               pud_populate(&init_mm, dst_pud, dst_pmd);
+               pud_populate(&init_mm, dst_pudp, dst_pmdp);
        }
-       dst_pmd = pmd_offset(dst_pud, start);
+       dst_pmdp = pmd_offset(dst_pudp, start);
 
-       src_pmd = pmd_offset(src_pud, start);
+       src_pmdp = pmd_offset(src_pudp, start);
        do {
+               pmd_t pmd = READ_ONCE(*src_pmdp);
+
                next = pmd_addr_end(addr, end);
-               if (pmd_none(*src_pmd))
+               if (pmd_none(pmd))
                        continue;
-               if (pmd_table(*src_pmd)) {
-                       if (copy_pte(dst_pmd, src_pmd, addr, next))
+               if (pmd_table(pmd)) {
+                       if (copy_pte(dst_pmdp, src_pmdp, addr, next))
                                return -ENOMEM;
                } else {
-                       set_pmd(dst_pmd,
-                               __pmd(pmd_val(*src_pmd) & ~PMD_SECT_RDONLY));
+                       set_pmd(dst_pmdp,
+                               __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
                }
-       } while (dst_pmd++, src_pmd++, addr = next, addr != end);
+       } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
 
        return 0;
 }
 
-static int copy_pud(pgd_t *dst_pgd, pgd_t *src_pgd, unsigned long start,
+static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
                    unsigned long end)
 {
-       pud_t *dst_pud;
-       pud_t *src_pud;
+       pud_t *dst_pudp;
+       pud_t *src_pudp;
        unsigned long next;
        unsigned long addr = start;
 
-       if (pgd_none(*dst_pgd)) {
-               dst_pud = (pud_t *)get_safe_page(GFP_ATOMIC);
-               if (!dst_pud)
+       if (pgd_none(READ_ONCE(*dst_pgdp))) {
+               dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
+               if (!dst_pudp)
                        return -ENOMEM;
-               pgd_populate(&init_mm, dst_pgd, dst_pud);
+               pgd_populate(&init_mm, dst_pgdp, dst_pudp);
        }
-       dst_pud = pud_offset(dst_pgd, start);
+       dst_pudp = pud_offset(dst_pgdp, start);
 
-       src_pud = pud_offset(src_pgd, start);
+       src_pudp = pud_offset(src_pgdp, start);
        do {
+               pud_t pud = READ_ONCE(*src_pudp);
+
                next = pud_addr_end(addr, end);
-               if (pud_none(*src_pud))
+               if (pud_none(pud))
                        continue;
-               if (pud_table(*(src_pud))) {
-                       if (copy_pmd(dst_pud, src_pud, addr, next))
+               if (pud_table(pud)) {
+                       if (copy_pmd(dst_pudp, src_pudp, addr, next))
                                return -ENOMEM;
                } else {
-                       set_pud(dst_pud,
-                               __pud(pud_val(*src_pud) & ~PMD_SECT_RDONLY));
+                       set_pud(dst_pudp,
+                               __pud(pud_val(pud) & ~PMD_SECT_RDONLY));
                }
-       } while (dst_pud++, src_pud++, addr = next, addr != end);
+       } while (dst_pudp++, src_pudp++, addr = next, addr != end);
 
        return 0;
 }
 
-static int copy_page_tables(pgd_t *dst_pgd, unsigned long start,
+static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
                            unsigned long end)
 {
        unsigned long next;
        unsigned long addr = start;
-       pgd_t *src_pgd = pgd_offset_k(start);
+       pgd_t *src_pgdp = pgd_offset_k(start);
 
-       dst_pgd = pgd_offset_raw(dst_pgd, start);
+       dst_pgdp = pgd_offset_raw(dst_pgdp, start);
        do {
                next = pgd_addr_end(addr, end);
-               if (pgd_none(*src_pgd))
+               if (pgd_none(READ_ONCE(*src_pgdp)))
                        continue;
-               if (copy_pud(dst_pgd, src_pgd, addr, next))
+               if (copy_pud(dst_pgdp, src_pgdp, addr, next))
                        return -ENOMEM;
-       } while (dst_pgd++, src_pgd++, addr = next, addr != end);
+       } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
 
        return 0;
 }
index 116252a8d3a5507295ed30abfd740e1cc07446ae..870f4b1587f97496c3fd427fe6b652bae1a96cd7 100644 (file)
@@ -407,8 +407,10 @@ again:
                u32 midr = read_cpuid_id();
 
                /* Apply BTAC predictors mitigation to all Falkor chips */
-               if ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)
+               if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
+                   ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) {
                        __qcom_hyp_sanitize_btac_predictors();
+               }
        }
 
        fp_enabled = __fpsimd_enabled();
index 7b60d62ac5939e83c8e153ec1c3a0447565f23eb..65dfc8571bf8397c3f2a6297d21b5112794461e1 100644 (file)
@@ -286,48 +286,52 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level,
 
 }
 
-static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+static void walk_pte(struct pg_state *st, pmd_t *pmdp, unsigned long start)
 {
-       pte_t *pte = pte_offset_kernel(pmd, 0UL);
+       pte_t *ptep = pte_offset_kernel(pmdp, 0UL);
        unsigned long addr;
        unsigned i;
 
-       for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+       for (i = 0; i < PTRS_PER_PTE; i++, ptep++) {
                addr = start + i * PAGE_SIZE;
-               note_page(st, addr, 4, pte_val(*pte));
+               note_page(st, addr, 4, READ_ONCE(pte_val(*ptep)));
        }
 }
 
-static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+static void walk_pmd(struct pg_state *st, pud_t *pudp, unsigned long start)
 {
-       pmd_t *pmd = pmd_offset(pud, 0UL);
+       pmd_t *pmdp = pmd_offset(pudp, 0UL);
        unsigned long addr;
        unsigned i;
 
-       for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+       for (i = 0; i < PTRS_PER_PMD; i++, pmdp++) {
+               pmd_t pmd = READ_ONCE(*pmdp);
+
                addr = start + i * PMD_SIZE;
-               if (pmd_none(*pmd) || pmd_sect(*pmd)) {
-                       note_page(st, addr, 3, pmd_val(*pmd));
+               if (pmd_none(pmd) || pmd_sect(pmd)) {
+                       note_page(st, addr, 3, pmd_val(pmd));
                } else {
-                       BUG_ON(pmd_bad(*pmd));
-                       walk_pte(st, pmd, addr);
+                       BUG_ON(pmd_bad(pmd));
+                       walk_pte(st, pmdp, addr);
                }
        }
 }
 
-static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
+static void walk_pud(struct pg_state *st, pgd_t *pgdp, unsigned long start)
 {
-       pud_t *pud = pud_offset(pgd, 0UL);
+       pud_t *pudp = pud_offset(pgdp, 0UL);
        unsigned long addr;
        unsigned i;
 
-       for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+       for (i = 0; i < PTRS_PER_PUD; i++, pudp++) {
+               pud_t pud = READ_ONCE(*pudp);
+
                addr = start + i * PUD_SIZE;
-               if (pud_none(*pud) || pud_sect(*pud)) {
-                       note_page(st, addr, 2, pud_val(*pud));
+               if (pud_none(pud) || pud_sect(pud)) {
+                       note_page(st, addr, 2, pud_val(pud));
                } else {
-                       BUG_ON(pud_bad(*pud));
-                       walk_pmd(st, pud, addr);
+                       BUG_ON(pud_bad(pud));
+                       walk_pmd(st, pudp, addr);
                }
        }
 }
@@ -335,17 +339,19 @@ static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start)
 static void walk_pgd(struct pg_state *st, struct mm_struct *mm,
                     unsigned long start)
 {
-       pgd_t *pgd = pgd_offset(mm, 0UL);
+       pgd_t *pgdp = pgd_offset(mm, 0UL);
        unsigned i;
        unsigned long addr;
 
-       for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+       for (i = 0; i < PTRS_PER_PGD; i++, pgdp++) {
+               pgd_t pgd = READ_ONCE(*pgdp);
+
                addr = start + i * PGDIR_SIZE;
-               if (pgd_none(*pgd)) {
-                       note_page(st, addr, 1, pgd_val(*pgd));
+               if (pgd_none(pgd)) {
+                       note_page(st, addr, 1, pgd_val(pgd));
                } else {
-                       BUG_ON(pgd_bad(*pgd));
-                       walk_pud(st, pgd, addr);
+                       BUG_ON(pgd_bad(pgd));
+                       walk_pud(st, pgdp, addr);
                }
        }
 }
index f76bb2c3c9434dc29c572d4103f9eb10b42dc278..bff11553eb050306dfa9df7fec0682f6a03cbf61 100644 (file)
@@ -130,7 +130,8 @@ static void mem_abort_decode(unsigned int esr)
 void show_pte(unsigned long addr)
 {
        struct mm_struct *mm;
-       pgd_t *pgd;
+       pgd_t *pgdp;
+       pgd_t pgd;
 
        if (addr < TASK_SIZE) {
                /* TTBR0 */
@@ -149,33 +150,37 @@ void show_pte(unsigned long addr)
                return;
        }
 
-       pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n",
+       pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
                 mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
                 VA_BITS, mm->pgd);
-       pgd = pgd_offset(mm, addr);
-       pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd));
+       pgdp = pgd_offset(mm, addr);
+       pgd = READ_ONCE(*pgdp);
+       pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
 
        do {
-               pud_t *pud;
-               pmd_t *pmd;
-               pte_t *pte;
+               pud_t *pudp, pud;
+               pmd_t *pmdp, pmd;
+               pte_t *ptep, pte;
 
-               if (pgd_none(*pgd) || pgd_bad(*pgd))
+               if (pgd_none(pgd) || pgd_bad(pgd))
                        break;
 
-               pud = pud_offset(pgd, addr);
-               pr_cont(", *pud=%016llx", pud_val(*pud));
-               if (pud_none(*pud) || pud_bad(*pud))
+               pudp = pud_offset(pgdp, addr);
+               pud = READ_ONCE(*pudp);
+               pr_cont(", pud=%016llx", pud_val(pud));
+               if (pud_none(pud) || pud_bad(pud))
                        break;
 
-               pmd = pmd_offset(pud, addr);
-               pr_cont(", *pmd=%016llx", pmd_val(*pmd));
-               if (pmd_none(*pmd) || pmd_bad(*pmd))
+               pmdp = pmd_offset(pudp, addr);
+               pmd = READ_ONCE(*pmdp);
+               pr_cont(", pmd=%016llx", pmd_val(pmd));
+               if (pmd_none(pmd) || pmd_bad(pmd))
                        break;
 
-               pte = pte_offset_map(pmd, addr);
-               pr_cont(", *pte=%016llx", pte_val(*pte));
-               pte_unmap(pte);
+               ptep = pte_offset_map(pmdp, addr);
+               pte = READ_ONCE(*ptep);
+               pr_cont(", pte=%016llx", pte_val(pte));
+               pte_unmap(ptep);
        } while(0);
 
        pr_cont("\n");
@@ -196,8 +201,9 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
                          pte_t entry, int dirty)
 {
        pteval_t old_pteval, pteval;
+       pte_t pte = READ_ONCE(*ptep);
 
-       if (pte_same(*ptep, entry))
+       if (pte_same(pte, entry))
                return 0;
 
        /* only preserve the access flags and write permission */
@@ -210,7 +216,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
         * (calculated as: a & b == ~(~a | ~b)).
         */
        pte_val(entry) ^= PTE_RDONLY;
-       pteval = READ_ONCE(pte_val(*ptep));
+       pteval = pte_val(pte);
        do {
                old_pteval = pteval;
                pteval ^= PTE_RDONLY;
index 6cb0fa92a65162ecce1e84b8ef09177fcc54785d..ecc6818191df961eac49e6ca0c7d8b8d38d0c855 100644 (file)
@@ -54,14 +54,14 @@ static inline pgprot_t pte_pgprot(pte_t pte)
 static int find_num_contig(struct mm_struct *mm, unsigned long addr,
                           pte_t *ptep, size_t *pgsize)
 {
-       pgd_t *pgd = pgd_offset(mm, addr);
-       pud_t *pud;
-       pmd_t *pmd;
+       pgd_t *pgdp = pgd_offset(mm, addr);
+       pud_t *pudp;
+       pmd_t *pmdp;
 
        *pgsize = PAGE_SIZE;
-       pud = pud_offset(pgd, addr);
-       pmd = pmd_offset(pud, addr);
-       if ((pte_t *)pmd == ptep) {
+       pudp = pud_offset(pgdp, addr);
+       pmdp = pmd_offset(pudp, addr);
+       if ((pte_t *)pmdp == ptep) {
                *pgsize = PMD_SIZE;
                return CONT_PMDS;
        }
@@ -181,11 +181,8 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 
        clear_flush(mm, addr, ptep, pgsize, ncontig);
 
-       for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn) {
-               pr_debug("%s: set pte %p to 0x%llx\n", __func__, ptep,
-                        pte_val(pfn_pte(pfn, hugeprot)));
+       for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
                set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
-       }
 }
 
 void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
@@ -203,20 +200,20 @@ void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 pte_t *huge_pte_alloc(struct mm_struct *mm,
                      unsigned long addr, unsigned long sz)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pte_t *pte = NULL;
-
-       pr_debug("%s: addr:0x%lx sz:0x%lx\n", __func__, addr, sz);
-       pgd = pgd_offset(mm, addr);
-       pud = pud_alloc(mm, pgd, addr);
-       if (!pud)
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
+       pte_t *ptep = NULL;
+
+       pgdp = pgd_offset(mm, addr);
+       pudp = pud_alloc(mm, pgdp, addr);
+       if (!pudp)
                return NULL;
 
        if (sz == PUD_SIZE) {
-               pte = (pte_t *)pud;
+               ptep = (pte_t *)pudp;
        } else if (sz == (PAGE_SIZE * CONT_PTES)) {
-               pmd_t *pmd = pmd_alloc(mm, pud, addr);
+               pmdp = pmd_alloc(mm, pudp, addr);
 
                WARN_ON(addr & (sz - 1));
                /*
@@ -226,60 +223,55 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
                 * will be no pte_unmap() to correspond with this
                 * pte_alloc_map().
                 */
-               pte = pte_alloc_map(mm, pmd, addr);
+               ptep = pte_alloc_map(mm, pmdp, addr);
        } else if (sz == PMD_SIZE) {
                if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
-                   pud_none(*pud))
-                       pte = huge_pmd_share(mm, addr, pud);
+                   pud_none(READ_ONCE(*pudp)))
+                       ptep = huge_pmd_share(mm, addr, pudp);
                else
-                       pte = (pte_t *)pmd_alloc(mm, pud, addr);
+                       ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
        } else if (sz == (PMD_SIZE * CONT_PMDS)) {
-               pmd_t *pmd;
-
-               pmd = pmd_alloc(mm, pud, addr);
+               pmdp = pmd_alloc(mm, pudp, addr);
                WARN_ON(addr & (sz - 1));
-               return (pte_t *)pmd;
+               return (pte_t *)pmdp;
        }
 
-       pr_debug("%s: addr:0x%lx sz:0x%lx ret pte=%p/0x%llx\n", __func__, addr,
-              sz, pte, pte_val(*pte));
-       return pte;
+       return ptep;
 }
 
 pte_t *huge_pte_offset(struct mm_struct *mm,
                       unsigned long addr, unsigned long sz)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
+       pgd_t *pgdp;
+       pud_t *pudp, pud;
+       pmd_t *pmdp, pmd;
 
-       pgd = pgd_offset(mm, addr);
-       pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
-       if (!pgd_present(*pgd))
+       pgdp = pgd_offset(mm, addr);
+       if (!pgd_present(READ_ONCE(*pgdp)))
                return NULL;
 
-       pud = pud_offset(pgd, addr);
-       if (sz != PUD_SIZE && pud_none(*pud))
+       pudp = pud_offset(pgdp, addr);
+       pud = READ_ONCE(*pudp);
+       if (sz != PUD_SIZE && pud_none(pud))
                return NULL;
        /* hugepage or swap? */
-       if (pud_huge(*pud) || !pud_present(*pud))
-               return (pte_t *)pud;
+       if (pud_huge(pud) || !pud_present(pud))
+               return (pte_t *)pudp;
        /* table; check the next level */
 
        if (sz == CONT_PMD_SIZE)
                addr &= CONT_PMD_MASK;
 
-       pmd = pmd_offset(pud, addr);
+       pmdp = pmd_offset(pudp, addr);
+       pmd = READ_ONCE(*pmdp);
        if (!(sz == PMD_SIZE || sz == CONT_PMD_SIZE) &&
-           pmd_none(*pmd))
+           pmd_none(pmd))
                return NULL;
-       if (pmd_huge(*pmd) || !pmd_present(*pmd))
-               return (pte_t *)pmd;
+       if (pmd_huge(pmd) || !pmd_present(pmd))
+               return (pte_t *)pmdp;
 
-       if (sz == CONT_PTE_SIZE) {
-               pte_t *pte = pte_offset_kernel(pmd, (addr & CONT_PTE_MASK));
-               return pte;
-       }
+       if (sz == CONT_PTE_SIZE)
+               return pte_offset_kernel(pmdp, (addr & CONT_PTE_MASK));
 
        return NULL;
 }
@@ -367,7 +359,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
        size_t pgsize;
        pte_t pte;
 
-       if (!pte_cont(*ptep)) {
+       if (!pte_cont(READ_ONCE(*ptep))) {
                ptep_set_wrprotect(mm, addr, ptep);
                return;
        }
@@ -391,7 +383,7 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
        size_t pgsize;
        int ncontig;
 
-       if (!pte_cont(*ptep)) {
+       if (!pte_cont(READ_ONCE(*ptep))) {
                ptep_clear_flush(vma, addr, ptep);
                return;
        }
index 6e02e6fb4c7b9e12da9796b2e8a2be68ca143ae0..dabfc1ecda3d3a9d57a430f1641eca05c1114703 100644 (file)
@@ -44,92 +44,92 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node)
        return __pa(p);
 }
 
-static pte_t *__init kasan_pte_offset(pmd_t *pmd, unsigned long addr, int node,
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node,
                                      bool early)
 {
-       if (pmd_none(*pmd)) {
+       if (pmd_none(READ_ONCE(*pmdp))) {
                phys_addr_t pte_phys = early ? __pa_symbol(kasan_zero_pte)
                                             : kasan_alloc_zeroed_page(node);
-               __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+               __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
        }
 
-       return early ? pte_offset_kimg(pmd, addr)
-                    : pte_offset_kernel(pmd, addr);
+       return early ? pte_offset_kimg(pmdp, addr)
+                    : pte_offset_kernel(pmdp, addr);
 }
 
-static pmd_t *__init kasan_pmd_offset(pud_t *pud, unsigned long addr, int node,
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node,
                                      bool early)
 {
-       if (pud_none(*pud)) {
+       if (pud_none(READ_ONCE(*pudp))) {
                phys_addr_t pmd_phys = early ? __pa_symbol(kasan_zero_pmd)
                                             : kasan_alloc_zeroed_page(node);
-               __pud_populate(pud, pmd_phys, PMD_TYPE_TABLE);
+               __pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE);
        }
 
-       return early ? pmd_offset_kimg(pud, addr) : pmd_offset(pud, addr);
+       return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr);
 }
 
-static pud_t *__init kasan_pud_offset(pgd_t *pgd, unsigned long addr, int node,
+static pud_t *__init kasan_pud_offset(pgd_t *pgdp, unsigned long addr, int node,
                                      bool early)
 {
-       if (pgd_none(*pgd)) {
+       if (pgd_none(READ_ONCE(*pgdp))) {
                phys_addr_t pud_phys = early ? __pa_symbol(kasan_zero_pud)
                                             : kasan_alloc_zeroed_page(node);
-               __pgd_populate(pgd, pud_phys, PMD_TYPE_TABLE);
+               __pgd_populate(pgdp, pud_phys, PMD_TYPE_TABLE);
        }
 
-       return early ? pud_offset_kimg(pgd, addr) : pud_offset(pgd, addr);
+       return early ? pud_offset_kimg(pgdp, addr) : pud_offset(pgdp, addr);
 }
 
-static void __init kasan_pte_populate(pmd_t *pmd, unsigned long addr,
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
                                      unsigned long end, int node, bool early)
 {
        unsigned long next;
-       pte_t *pte = kasan_pte_offset(pmd, addr, node, early);
+       pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
 
        do {
                phys_addr_t page_phys = early ? __pa_symbol(kasan_zero_page)
                                              : kasan_alloc_zeroed_page(node);
                next = addr + PAGE_SIZE;
-               set_pte(pte, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
-       } while (pte++, addr = next, addr != end && pte_none(*pte));
+               set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+       } while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep)));
 }
 
-static void __init kasan_pmd_populate(pud_t *pud, unsigned long addr,
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
                                      unsigned long end, int node, bool early)
 {
        unsigned long next;
-       pmd_t *pmd = kasan_pmd_offset(pud, addr, node, early);
+       pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
 
        do {
                next = pmd_addr_end(addr, end);
-               kasan_pte_populate(pmd, addr, next, node, early);
-       } while (pmd++, addr = next, addr != end && pmd_none(*pmd));
+               kasan_pte_populate(pmdp, addr, next, node, early);
+       } while (pmdp++, addr = next, addr != end && pmd_none(READ_ONCE(*pmdp)));
 }
 
-static void __init kasan_pud_populate(pgd_t *pgd, unsigned long addr,
+static void __init kasan_pud_populate(pgd_t *pgdp, unsigned long addr,
                                      unsigned long end, int node, bool early)
 {
        unsigned long next;
-       pud_t *pud = kasan_pud_offset(pgd, addr, node, early);
+       pud_t *pudp = kasan_pud_offset(pgdp, addr, node, early);
 
        do {
                next = pud_addr_end(addr, end);
-               kasan_pmd_populate(pud, addr, next, node, early);
-       } while (pud++, addr = next, addr != end && pud_none(*pud));
+               kasan_pmd_populate(pudp, addr, next, node, early);
+       } while (pudp++, addr = next, addr != end && pud_none(READ_ONCE(*pudp)));
 }
 
 static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
                                      int node, bool early)
 {
        unsigned long next;
-       pgd_t *pgd;
+       pgd_t *pgdp;
 
-       pgd = pgd_offset_k(addr);
+       pgdp = pgd_offset_k(addr);
        do {
                next = pgd_addr_end(addr, end);
-               kasan_pud_populate(pgd, addr, next, node, early);
-       } while (pgd++, addr = next, addr != end);
+               kasan_pud_populate(pgdp, addr, next, node, early);
+       } while (pgdp++, addr = next, addr != end);
 }
 
 /* The early shadow maps everything to a single page of zeroes */
@@ -155,14 +155,14 @@ static void __init kasan_map_populate(unsigned long start, unsigned long end,
  */
 void __init kasan_copy_shadow(pgd_t *pgdir)
 {
-       pgd_t *pgd, *pgd_new, *pgd_end;
+       pgd_t *pgdp, *pgdp_new, *pgdp_end;
 
-       pgd = pgd_offset_k(KASAN_SHADOW_START);
-       pgd_end = pgd_offset_k(KASAN_SHADOW_END);
-       pgd_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
+       pgdp = pgd_offset_k(KASAN_SHADOW_START);
+       pgdp_end = pgd_offset_k(KASAN_SHADOW_END);
+       pgdp_new = pgd_offset_raw(pgdir, KASAN_SHADOW_START);
        do {
-               set_pgd(pgd_new, *pgd);
-       } while (pgd++, pgd_new++, pgd != pgd_end);
+               set_pgd(pgdp_new, READ_ONCE(*pgdp));
+       } while (pgdp++, pgdp_new++, pgdp != pgdp_end);
 }
 
 static void __init clear_pgds(unsigned long start,
index 4694cda823c9541527b95f269658bdbc4b8243d7..3161b853f29e1d35a21b0da5a01f571c995616f3 100644 (file)
@@ -125,45 +125,48 @@ static bool pgattr_change_is_safe(u64 old, u64 new)
        return ((old ^ new) & ~mask) == 0;
 }
 
-static void init_pte(pmd_t *pmd, unsigned long addr, unsigned long end,
+static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
                     phys_addr_t phys, pgprot_t prot)
 {
-       pte_t *pte;
+       pte_t *ptep;
 
-       pte = pte_set_fixmap_offset(pmd, addr);
+       ptep = pte_set_fixmap_offset(pmdp, addr);
        do {
-               pte_t old_pte = *pte;
+               pte_t old_pte = READ_ONCE(*ptep);
 
-               set_pte(pte, pfn_pte(__phys_to_pfn(phys), prot));
+               set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
 
                /*
                 * After the PTE entry has been populated once, we
                 * only allow updates to the permission attributes.
                 */
-               BUG_ON(!pgattr_change_is_safe(pte_val(old_pte), pte_val(*pte)));
+               BUG_ON(!pgattr_change_is_safe(pte_val(old_pte),
+                                             READ_ONCE(pte_val(*ptep))));
 
                phys += PAGE_SIZE;
-       } while (pte++, addr += PAGE_SIZE, addr != end);
+       } while (ptep++, addr += PAGE_SIZE, addr != end);
 
        pte_clear_fixmap();
 }
 
-static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
+static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
                                unsigned long end, phys_addr_t phys,
                                pgprot_t prot,
                                phys_addr_t (*pgtable_alloc)(void),
                                int flags)
 {
        unsigned long next;
+       pmd_t pmd = READ_ONCE(*pmdp);
 
-       BUG_ON(pmd_sect(*pmd));
-       if (pmd_none(*pmd)) {
+       BUG_ON(pmd_sect(pmd));
+       if (pmd_none(pmd)) {
                phys_addr_t pte_phys;
                BUG_ON(!pgtable_alloc);
                pte_phys = pgtable_alloc();
-               __pmd_populate(pmd, pte_phys, PMD_TYPE_TABLE);
+               __pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
+               pmd = READ_ONCE(*pmdp);
        }
-       BUG_ON(pmd_bad(*pmd));
+       BUG_ON(pmd_bad(pmd));
 
        do {
                pgprot_t __prot = prot;
@@ -175,67 +178,69 @@ static void alloc_init_cont_pte(pmd_t *pmd, unsigned long addr,
                    (flags & NO_CONT_MAPPINGS) == 0)
                        __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
 
-               init_pte(pmd, addr, next, phys, __prot);
+               init_pte(pmdp, addr, next, phys, __prot);
 
                phys += next - addr;
        } while (addr = next, addr != end);
 }
 
-static void init_pmd(pud_t *pud, unsigned long addr, unsigned long end,
+static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
                     phys_addr_t phys, pgprot_t prot,
                     phys_addr_t (*pgtable_alloc)(void), int flags)
 {
        unsigned long next;
-       pmd_t *pmd;
+       pmd_t *pmdp;
 
-       pmd = pmd_set_fixmap_offset(pud, addr);
+       pmdp = pmd_set_fixmap_offset(pudp, addr);
        do {
-               pmd_t old_pmd = *pmd;
+               pmd_t old_pmd = READ_ONCE(*pmdp);
 
                next = pmd_addr_end(addr, end);
 
                /* try section mapping first */
                if (((addr | next | phys) & ~SECTION_MASK) == 0 &&
                    (flags & NO_BLOCK_MAPPINGS) == 0) {
-                       pmd_set_huge(pmd, phys, prot);
+                       pmd_set_huge(pmdp, phys, prot);
 
                        /*
                         * After the PMD entry has been populated once, we
                         * only allow updates to the permission attributes.
                         */
                        BUG_ON(!pgattr_change_is_safe(pmd_val(old_pmd),
-                                                     pmd_val(*pmd)));
+                                                     READ_ONCE(pmd_val(*pmdp))));
                } else {
-                       alloc_init_cont_pte(pmd, addr, next, phys, prot,
+                       alloc_init_cont_pte(pmdp, addr, next, phys, prot,
                                            pgtable_alloc, flags);
 
                        BUG_ON(pmd_val(old_pmd) != 0 &&
-                              pmd_val(old_pmd) != pmd_val(*pmd));
+                              pmd_val(old_pmd) != READ_ONCE(pmd_val(*pmdp)));
                }
                phys += next - addr;
-       } while (pmd++, addr = next, addr != end);
+       } while (pmdp++, addr = next, addr != end);
 
        pmd_clear_fixmap();
 }
 
-static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
+static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
                                unsigned long end, phys_addr_t phys,
                                pgprot_t prot,
                                phys_addr_t (*pgtable_alloc)(void), int flags)
 {
        unsigned long next;
+       pud_t pud = READ_ONCE(*pudp);
 
        /*
         * Check for initial section mappings in the pgd/pud.
         */
-       BUG_ON(pud_sect(*pud));
-       if (pud_none(*pud)) {
+       BUG_ON(pud_sect(pud));
+       if (pud_none(pud)) {
                phys_addr_t pmd_phys;
                BUG_ON(!pgtable_alloc);
                pmd_phys = pgtable_alloc();
-               __pud_populate(pud, pmd_phys, PUD_TYPE_TABLE);
+               __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
+               pud = READ_ONCE(*pudp);
        }
-       BUG_ON(pud_bad(*pud));
+       BUG_ON(pud_bad(pud));
 
        do {
                pgprot_t __prot = prot;
@@ -247,7 +252,7 @@ static void alloc_init_cont_pmd(pud_t *pud, unsigned long addr,
                    (flags & NO_CONT_MAPPINGS) == 0)
                        __prot = __pgprot(pgprot_val(prot) | PTE_CONT);
 
-               init_pmd(pud, addr, next, phys, __prot, pgtable_alloc, flags);
+               init_pmd(pudp, addr, next, phys, __prot, pgtable_alloc, flags);
 
                phys += next - addr;
        } while (addr = next, addr != end);
@@ -265,25 +270,27 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
        return true;
 }
 
-static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
-                                 phys_addr_t phys, pgprot_t prot,
-                                 phys_addr_t (*pgtable_alloc)(void),
-                                 int flags)
+static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
+                          phys_addr_t phys, pgprot_t prot,
+                          phys_addr_t (*pgtable_alloc)(void),
+                          int flags)
 {
-       pud_t *pud;
        unsigned long next;
+       pud_t *pudp;
+       pgd_t pgd = READ_ONCE(*pgdp);
 
-       if (pgd_none(*pgd)) {
+       if (pgd_none(pgd)) {
                phys_addr_t pud_phys;
                BUG_ON(!pgtable_alloc);
                pud_phys = pgtable_alloc();
-               __pgd_populate(pgd, pud_phys, PUD_TYPE_TABLE);
+               __pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
+               pgd = READ_ONCE(*pgdp);
        }
-       BUG_ON(pgd_bad(*pgd));
+       BUG_ON(pgd_bad(pgd));
 
-       pud = pud_set_fixmap_offset(pgd, addr);
+       pudp = pud_set_fixmap_offset(pgdp, addr);
        do {
-               pud_t old_pud = *pud;
+               pud_t old_pud = READ_ONCE(*pudp);
 
                next = pud_addr_end(addr, end);
 
@@ -292,23 +299,23 @@ static void alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end,
                 */
                if (use_1G_block(addr, next, phys) &&
                    (flags & NO_BLOCK_MAPPINGS) == 0) {
-                       pud_set_huge(pud, phys, prot);
+                       pud_set_huge(pudp, phys, prot);
 
                        /*
                         * After the PUD entry has been populated once, we
                         * only allow updates to the permission attributes.
                         */
                        BUG_ON(!pgattr_change_is_safe(pud_val(old_pud),
-                                                     pud_val(*pud)));
+                                                     READ_ONCE(pud_val(*pudp))));
                } else {
-                       alloc_init_cont_pmd(pud, addr, next, phys, prot,
+                       alloc_init_cont_pmd(pudp, addr, next, phys, prot,
                                            pgtable_alloc, flags);
 
                        BUG_ON(pud_val(old_pud) != 0 &&
-                              pud_val(old_pud) != pud_val(*pud));
+                              pud_val(old_pud) != READ_ONCE(pud_val(*pudp)));
                }
                phys += next - addr;
-       } while (pud++, addr = next, addr != end);
+       } while (pudp++, addr = next, addr != end);
 
        pud_clear_fixmap();
 }
@@ -320,7 +327,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
                                 int flags)
 {
        unsigned long addr, length, end, next;
-       pgd_t *pgd = pgd_offset_raw(pgdir, virt);
+       pgd_t *pgdp = pgd_offset_raw(pgdir, virt);
 
        /*
         * If the virtual and physical address don't have the same offset
@@ -336,10 +343,10 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
        end = addr + length;
        do {
                next = pgd_addr_end(addr, end);
-               alloc_init_pud(pgd, addr, next, phys, prot, pgtable_alloc,
+               alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc,
                               flags);
                phys += next - addr;
-       } while (pgd++, addr = next, addr != end);
+       } while (pgdp++, addr = next, addr != end);
 }
 
 static phys_addr_t pgd_pgtable_alloc(void)
@@ -401,10 +408,10 @@ static void update_mapping_prot(phys_addr_t phys, unsigned long virt,
        flush_tlb_kernel_range(virt, virt + size);
 }
 
-static void __init __map_memblock(pgd_t *pgd, phys_addr_t start,
+static void __init __map_memblock(pgd_t *pgdp, phys_addr_t start,
                                  phys_addr_t end, pgprot_t prot, int flags)
 {
-       __create_pgd_mapping(pgd, start, __phys_to_virt(start), end - start,
+       __create_pgd_mapping(pgdp, start, __phys_to_virt(start), end - start,
                             prot, early_pgtable_alloc, flags);
 }
 
@@ -418,7 +425,7 @@ void __init mark_linear_text_alias_ro(void)
                            PAGE_KERNEL_RO);
 }
 
-static void __init map_mem(pgd_t *pgd)
+static void __init map_mem(pgd_t *pgdp)
 {
        phys_addr_t kernel_start = __pa_symbol(_text);
        phys_addr_t kernel_end = __pa_symbol(__init_begin);
@@ -451,7 +458,7 @@ static void __init map_mem(pgd_t *pgd)
                if (memblock_is_nomap(reg))
                        continue;
 
-               __map_memblock(pgd, start, end, PAGE_KERNEL, flags);
+               __map_memblock(pgdp, start, end, PAGE_KERNEL, flags);
        }
 
        /*
@@ -464,7 +471,7 @@ static void __init map_mem(pgd_t *pgd)
         * Note that contiguous mappings cannot be remapped in this way,
         * so we should avoid them here.
         */
-       __map_memblock(pgd, kernel_start, kernel_end,
+       __map_memblock(pgdp, kernel_start, kernel_end,
                       PAGE_KERNEL, NO_CONT_MAPPINGS);
        memblock_clear_nomap(kernel_start, kernel_end - kernel_start);
 
@@ -475,7 +482,7 @@ static void __init map_mem(pgd_t *pgd)
         * through /sys/kernel/kexec_crash_size interface.
         */
        if (crashk_res.end) {
-               __map_memblock(pgd, crashk_res.start, crashk_res.end + 1,
+               __map_memblock(pgdp, crashk_res.start, crashk_res.end + 1,
                               PAGE_KERNEL,
                               NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS);
                memblock_clear_nomap(crashk_res.start,
@@ -499,7 +506,7 @@ void mark_rodata_ro(void)
        debug_checkwx();
 }
 
-static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
+static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
                                      pgprot_t prot, struct vm_struct *vma,
                                      int flags, unsigned long vm_flags)
 {
@@ -509,7 +516,7 @@ static void __init map_kernel_segment(pgd_t *pgd, void *va_start, void *va_end,
        BUG_ON(!PAGE_ALIGNED(pa_start));
        BUG_ON(!PAGE_ALIGNED(size));
 
-       __create_pgd_mapping(pgd, pa_start, (unsigned long)va_start, size, prot,
+       __create_pgd_mapping(pgdp, pa_start, (unsigned long)va_start, size, prot,
                             early_pgtable_alloc, flags);
 
        if (!(vm_flags & VM_NO_GUARD))
@@ -562,7 +569,7 @@ core_initcall(map_entry_trampoline);
 /*
  * Create fine-grained mappings for the kernel.
  */
-static void __init map_kernel(pgd_t *pgd)
+static void __init map_kernel(pgd_t *pgdp)
 {
        static struct vm_struct vmlinux_text, vmlinux_rodata, vmlinux_inittext,
                                vmlinux_initdata, vmlinux_data;
@@ -578,24 +585,24 @@ static void __init map_kernel(pgd_t *pgd)
         * Only rodata will be remapped with different permissions later on,
         * all other segments are allowed to use contiguous mappings.
         */
-       map_kernel_segment(pgd, _text, _etext, text_prot, &vmlinux_text, 0,
+       map_kernel_segment(pgdp, _text, _etext, text_prot, &vmlinux_text, 0,
                           VM_NO_GUARD);
-       map_kernel_segment(pgd, __start_rodata, __inittext_begin, PAGE_KERNEL,
+       map_kernel_segment(pgdp, __start_rodata, __inittext_begin, PAGE_KERNEL,
                           &vmlinux_rodata, NO_CONT_MAPPINGS, VM_NO_GUARD);
-       map_kernel_segment(pgd, __inittext_begin, __inittext_end, text_prot,
+       map_kernel_segment(pgdp, __inittext_begin, __inittext_end, text_prot,
                           &vmlinux_inittext, 0, VM_NO_GUARD);
-       map_kernel_segment(pgd, __initdata_begin, __initdata_end, PAGE_KERNEL,
+       map_kernel_segment(pgdp, __initdata_begin, __initdata_end, PAGE_KERNEL,
                           &vmlinux_initdata, 0, VM_NO_GUARD);
-       map_kernel_segment(pgd, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
+       map_kernel_segment(pgdp, _data, _end, PAGE_KERNEL, &vmlinux_data, 0, 0);
 
-       if (!pgd_val(*pgd_offset_raw(pgd, FIXADDR_START))) {
+       if (!READ_ONCE(pgd_val(*pgd_offset_raw(pgdp, FIXADDR_START)))) {
                /*
                 * The fixmap falls in a separate pgd to the kernel, and doesn't
                 * live in the carveout for the swapper_pg_dir. We can simply
                 * re-use the existing dir for the fixmap.
                 */
-               set_pgd(pgd_offset_raw(pgd, FIXADDR_START),
-                       *pgd_offset_k(FIXADDR_START));
+               set_pgd(pgd_offset_raw(pgdp, FIXADDR_START),
+                       READ_ONCE(*pgd_offset_k(FIXADDR_START)));
        } else if (CONFIG_PGTABLE_LEVELS > 3) {
                /*
                 * The fixmap shares its top level pgd entry with the kernel
@@ -604,14 +611,15 @@ static void __init map_kernel(pgd_t *pgd)
                 * entry instead.
                 */
                BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
-               pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START),
+               pud_populate(&init_mm,
+                            pud_set_fixmap_offset(pgdp, FIXADDR_START),
                             lm_alias(bm_pmd));
                pud_clear_fixmap();
        } else {
                BUG();
        }
 
-       kasan_copy_shadow(pgd);
+       kasan_copy_shadow(pgdp);
 }
 
 /*
@@ -621,10 +629,10 @@ static void __init map_kernel(pgd_t *pgd)
 void __init paging_init(void)
 {
        phys_addr_t pgd_phys = early_pgtable_alloc();
-       pgd_t *pgd = pgd_set_fixmap(pgd_phys);
+       pgd_t *pgdp = pgd_set_fixmap(pgd_phys);
 
-       map_kernel(pgd);
-       map_mem(pgd);
+       map_kernel(pgdp);
+       map_mem(pgdp);
 
        /*
         * We want to reuse the original swapper_pg_dir so we don't have to
@@ -635,7 +643,7 @@ void __init paging_init(void)
         * To do this we need to go via a temporary pgd.
         */
        cpu_replace_ttbr1(__va(pgd_phys));
-       memcpy(swapper_pg_dir, pgd, PGD_SIZE);
+       memcpy(swapper_pg_dir, pgdp, PGD_SIZE);
        cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
 
        pgd_clear_fixmap();
@@ -655,37 +663,40 @@ void __init paging_init(void)
  */
 int kern_addr_valid(unsigned long addr)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
+       pgd_t *pgdp;
+       pud_t *pudp, pud;
+       pmd_t *pmdp, pmd;
+       pte_t *ptep, pte;
 
        if ((((long)addr) >> VA_BITS) != -1UL)
                return 0;
 
-       pgd = pgd_offset_k(addr);
-       if (pgd_none(*pgd))
+       pgdp = pgd_offset_k(addr);
+       if (pgd_none(READ_ONCE(*pgdp)))
                return 0;
 
-       pud = pud_offset(pgd, addr);
-       if (pud_none(*pud))
+       pudp = pud_offset(pgdp, addr);
+       pud = READ_ONCE(*pudp);
+       if (pud_none(pud))
                return 0;
 
-       if (pud_sect(*pud))
-               return pfn_valid(pud_pfn(*pud));
+       if (pud_sect(pud))
+               return pfn_valid(pud_pfn(pud));
 
-       pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd))
+       pmdp = pmd_offset(pudp, addr);
+       pmd = READ_ONCE(*pmdp);
+       if (pmd_none(pmd))
                return 0;
 
-       if (pmd_sect(*pmd))
-               return pfn_valid(pmd_pfn(*pmd));
+       if (pmd_sect(pmd))
+               return pfn_valid(pmd_pfn(pmd));
 
-       pte = pte_offset_kernel(pmd, addr);
-       if (pte_none(*pte))
+       ptep = pte_offset_kernel(pmdp, addr);
+       pte = READ_ONCE(*ptep);
+       if (pte_none(pte))
                return 0;
 
-       return pfn_valid(pte_pfn(*pte));
+       return pfn_valid(pte_pfn(pte));
 }
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 #if !ARM64_SWAPPER_USES_SECTION_MAPS
@@ -700,32 +711,32 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 {
        unsigned long addr = start;
        unsigned long next;
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
 
        do {
                next = pmd_addr_end(addr, end);
 
-               pgd = vmemmap_pgd_populate(addr, node);
-               if (!pgd)
+               pgdp = vmemmap_pgd_populate(addr, node);
+               if (!pgdp)
                        return -ENOMEM;
 
-               pud = vmemmap_pud_populate(pgd, addr, node);
-               if (!pud)
+               pudp = vmemmap_pud_populate(pgdp, addr, node);
+               if (!pudp)
                        return -ENOMEM;
 
-               pmd = pmd_offset(pud, addr);
-               if (pmd_none(*pmd)) {
+               pmdp = pmd_offset(pudp, addr);
+               if (pmd_none(READ_ONCE(*pmdp))) {
                        void *p = NULL;
 
                        p = vmemmap_alloc_block_buf(PMD_SIZE, node);
                        if (!p)
                                return -ENOMEM;
 
-                       pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL));
+                       pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
                } else
-                       vmemmap_verify((pte_t *)pmd, node, addr, next);
+                       vmemmap_verify((pte_t *)pmdp, node, addr, next);
        } while (addr = next, addr != end);
 
        return 0;
@@ -739,20 +750,22 @@ void vmemmap_free(unsigned long start, unsigned long end,
 
 static inline pud_t * fixmap_pud(unsigned long addr)
 {
-       pgd_t *pgd = pgd_offset_k(addr);
+       pgd_t *pgdp = pgd_offset_k(addr);
+       pgd_t pgd = READ_ONCE(*pgdp);
 
-       BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
+       BUG_ON(pgd_none(pgd) || pgd_bad(pgd));
 
-       return pud_offset_kimg(pgd, addr);
+       return pud_offset_kimg(pgdp, addr);
 }
 
 static inline pmd_t * fixmap_pmd(unsigned long addr)
 {
-       pud_t *pud = fixmap_pud(addr);
+       pud_t *pudp = fixmap_pud(addr);
+       pud_t pud = READ_ONCE(*pudp);
 
-       BUG_ON(pud_none(*pud) || pud_bad(*pud));
+       BUG_ON(pud_none(pud) || pud_bad(pud));
 
-       return pmd_offset_kimg(pud, addr);
+       return pmd_offset_kimg(pudp, addr);
 }
 
 static inline pte_t * fixmap_pte(unsigned long addr)
@@ -768,30 +781,31 @@ static inline pte_t * fixmap_pte(unsigned long addr)
  */
 void __init early_fixmap_init(void)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
+       pgd_t *pgdp, pgd;
+       pud_t *pudp;
+       pmd_t *pmdp;
        unsigned long addr = FIXADDR_START;
 
-       pgd = pgd_offset_k(addr);
+       pgdp = pgd_offset_k(addr);
+       pgd = READ_ONCE(*pgdp);
        if (CONFIG_PGTABLE_LEVELS > 3 &&
-           !(pgd_none(*pgd) || pgd_page_paddr(*pgd) == __pa_symbol(bm_pud))) {
+           !(pgd_none(pgd) || pgd_page_paddr(pgd) == __pa_symbol(bm_pud))) {
                /*
                 * We only end up here if the kernel mapping and the fixmap
                 * share the top level pgd entry, which should only happen on
                 * 16k/4 levels configurations.
                 */
                BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
-               pud = pud_offset_kimg(pgd, addr);
+               pudp = pud_offset_kimg(pgdp, addr);
        } else {
-               if (pgd_none(*pgd))
-                       __pgd_populate(pgd, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
-               pud = fixmap_pud(addr);
+               if (pgd_none(pgd))
+                       __pgd_populate(pgdp, __pa_symbol(bm_pud), PUD_TYPE_TABLE);
+               pudp = fixmap_pud(addr);
        }
-       if (pud_none(*pud))
-               __pud_populate(pud, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
-       pmd = fixmap_pmd(addr);
-       __pmd_populate(pmd, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
+       if (pud_none(READ_ONCE(*pudp)))
+               __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE);
+       pmdp = fixmap_pmd(addr);
+       __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
 
        /*
         * The boot-ioremap range spans multiple pmds, for which
@@ -800,11 +814,11 @@ void __init early_fixmap_init(void)
        BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
                     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
 
-       if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
-            || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
+       if ((pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
+            || pmdp != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
                WARN_ON(1);
-               pr_warn("pmd %p != %p, %p\n",
-                       pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
+               pr_warn("pmdp %p != %p, %p\n",
+                       pmdp, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)),
                        fixmap_pmd(fix_to_virt(FIX_BTMAP_END)));
                pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
                        fix_to_virt(FIX_BTMAP_BEGIN));
@@ -824,16 +838,16 @@ void __set_fixmap(enum fixed_addresses idx,
                               phys_addr_t phys, pgprot_t flags)
 {
        unsigned long addr = __fix_to_virt(idx);
-       pte_t *pte;
+       pte_t *ptep;
 
        BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 
-       pte = fixmap_pte(addr);
+       ptep = fixmap_pte(addr);
 
        if (pgprot_val(flags)) {
-               set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
+               set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, flags));
        } else {
-               pte_clear(&init_mm, addr, pte);
+               pte_clear(&init_mm, addr, ptep);
                flush_tlb_kernel_range(addr, addr+PAGE_SIZE);
        }
 }
@@ -915,36 +929,36 @@ int __init arch_ioremap_pmd_supported(void)
        return 1;
 }
 
-int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
 {
        pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT |
                                        pgprot_val(mk_sect_prot(prot)));
        BUG_ON(phys & ~PUD_MASK);
-       set_pud(pud, pfn_pud(__phys_to_pfn(phys), sect_prot));
+       set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot));
        return 1;
 }
 
-int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot)
 {
        pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT |
                                        pgprot_val(mk_sect_prot(prot)));
        BUG_ON(phys & ~PMD_MASK);
-       set_pmd(pmd, pfn_pmd(__phys_to_pfn(phys), sect_prot));
+       set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot));
        return 1;
 }
 
-int pud_clear_huge(pud_t *pud)
+int pud_clear_huge(pud_t *pudp)
 {
-       if (!pud_sect(*pud))
+       if (!pud_sect(READ_ONCE(*pudp)))
                return 0;
-       pud_clear(pud);
+       pud_clear(pudp);
        return 1;
 }
 
-int pmd_clear_huge(pmd_t *pmd)
+int pmd_clear_huge(pmd_t *pmdp)
 {
-       if (!pmd_sect(*pmd))
+       if (!pmd_sect(READ_ONCE(*pmdp)))
                return 0;
-       pmd_clear(pmd);
+       pmd_clear(pmdp);
        return 1;
 }
index a682a0a2a0fa4d5db9175256e78100b1afde344a..a56359373d8b3592e6cde6891d9b44206bd96137 100644 (file)
@@ -29,7 +29,7 @@ static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
                        void *data)
 {
        struct page_change_data *cdata = data;
-       pte_t pte = *ptep;
+       pte_t pte = READ_ONCE(*ptep);
 
        pte = clear_pte_bit(pte, cdata->clear_mask);
        pte = set_pte_bit(pte, cdata->set_mask);
@@ -156,30 +156,32 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
  */
 bool kernel_page_present(struct page *page)
 {
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
+       pgd_t *pgdp;
+       pud_t *pudp, pud;
+       pmd_t *pmdp, pmd;
+       pte_t *ptep;
        unsigned long addr = (unsigned long)page_address(page);
 
-       pgd = pgd_offset_k(addr);
-       if (pgd_none(*pgd))
+       pgdp = pgd_offset_k(addr);
+       if (pgd_none(READ_ONCE(*pgdp)))
                return false;
 
-       pud = pud_offset(pgd, addr);
-       if (pud_none(*pud))
+       pudp = pud_offset(pgdp, addr);
+       pud = READ_ONCE(*pudp);
+       if (pud_none(pud))
                return false;
-       if (pud_sect(*pud))
+       if (pud_sect(pud))
                return true;
 
-       pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd))
+       pmdp = pmd_offset(pudp, addr);
+       pmd = READ_ONCE(*pmdp);
+       if (pmd_none(pmd))
                return false;
-       if (pmd_sect(*pmd))
+       if (pmd_sect(pmd))
                return true;
 
-       pte = pte_offset_kernel(pmd, addr);
-       return pte_valid(*pte);
+       ptep = pte_offset_kernel(pmdp, addr);
+       return pte_valid(READ_ONCE(*ptep));
 }
 #endif /* CONFIG_HIBERNATION */
 #endif /* CONFIG_DEBUG_PAGEALLOC */
index 71baed7e592a499196a1c7bc239dbbc3297d3bc2..c0af4761729986da832a8e844b256e64da8cb4c1 100644 (file)
@@ -205,7 +205,8 @@ ENDPROC(idmap_cpu_replace_ttbr1)
        dc      cvac, cur_\()\type\()p          // Ensure any existing dirty
        dmb     sy                              // lines are written back before
        ldr     \type, [cur_\()\type\()p]       // loading the entry
-       tbz     \type, #0, next_\()\type        // Skip invalid entries
+       tbz     \type, #0, skip_\()\type        // Skip invalid and
+       tbnz    \type, #11, skip_\()\type       // non-global entries
        .endm
 
        .macro __idmap_kpti_put_pgtable_ent_ng, type
@@ -265,8 +266,9 @@ ENTRY(idmap_kpti_install_ng_mappings)
        add     end_pgdp, cur_pgdp, #(PTRS_PER_PGD * 8)
 do_pgd:        __idmap_kpti_get_pgtable_ent    pgd
        tbnz    pgd, #1, walk_puds
-       __idmap_kpti_put_pgtable_ent_ng pgd
 next_pgd:
+       __idmap_kpti_put_pgtable_ent_ng pgd
+skip_pgd:
        add     cur_pgdp, cur_pgdp, #8
        cmp     cur_pgdp, end_pgdp
        b.ne    do_pgd
@@ -294,8 +296,9 @@ walk_puds:
        add     end_pudp, cur_pudp, #(PTRS_PER_PUD * 8)
 do_pud:        __idmap_kpti_get_pgtable_ent    pud
        tbnz    pud, #1, walk_pmds
-       __idmap_kpti_put_pgtable_ent_ng pud
 next_pud:
+       __idmap_kpti_put_pgtable_ent_ng pud
+skip_pud:
        add     cur_pudp, cur_pudp, 8
        cmp     cur_pudp, end_pudp
        b.ne    do_pud
@@ -314,8 +317,9 @@ walk_pmds:
        add     end_pmdp, cur_pmdp, #(PTRS_PER_PMD * 8)
 do_pmd:        __idmap_kpti_get_pgtable_ent    pmd
        tbnz    pmd, #1, walk_ptes
-       __idmap_kpti_put_pgtable_ent_ng pmd
 next_pmd:
+       __idmap_kpti_put_pgtable_ent_ng pmd
+skip_pmd:
        add     cur_pmdp, cur_pmdp, #8
        cmp     cur_pmdp, end_pmdp
        b.ne    do_pmd
@@ -333,7 +337,7 @@ walk_ptes:
        add     end_ptep, cur_ptep, #(PTRS_PER_PTE * 8)
 do_pte:        __idmap_kpti_get_pgtable_ent    pte
        __idmap_kpti_put_pgtable_ent_ng pte
-next_pte:
+skip_pte:
        add     cur_ptep, cur_ptep, #8
        cmp     cur_ptep, end_ptep
        b.ne    do_pte
index 0b4c65a1af25fbe264766ee1e14c4c0bf19bc5e6..498f3da3f225d2ed8479af540bbbc22786e2c55d 100644 (file)
@@ -41,7 +41,6 @@ ifneq ($(CONFIG_IA64_ESI),)
 obj-y                          += esi_stub.o   # must be in kernel proper
 endif
 obj-$(CONFIG_INTEL_IOMMU)      += pci-dma.o
-obj-$(CONFIG_SWIOTLB)          += pci-swiotlb.o
 
 obj-$(CONFIG_BINFMT_ELF)       += elfcore.o
 
index 19c88d770054617bfc23b15c81c20b12a7f7cc3f..fcf9af492d60229a491337e56b1ebc9adf60f7fb 100644 (file)
@@ -10,6 +10,8 @@
 
 #include <linux/errno.h>
 #include <linux/percpu.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/spinlock.h>
 
 #include <asm/mips-cps.h>
@@ -22,6 +24,17 @@ static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags);
 
 phys_addr_t __weak mips_cpc_default_phys_base(void)
 {
+       struct device_node *cpc_node;
+       struct resource res;
+       int err;
+
+       cpc_node = of_find_compatible_node(of_root, NULL, "mti,mips-cpc");
+       if (cpc_node) {
+               err = of_address_to_resource(cpc_node, 0, &res);
+               if (!err)
+                       return res.start;
+       }
+
        return 0;
 }
 
index 85bc601e9a0d43ffd89669eac4b359de99566083..5f8b0a9e30b3d6faec9befca1e759a8f9263f8c8 100644 (file)
@@ -375,6 +375,7 @@ static void __init bootmem_init(void)
        unsigned long reserved_end;
        unsigned long mapstart = ~0UL;
        unsigned long bootmap_size;
+       phys_addr_t ramstart = (phys_addr_t)ULLONG_MAX;
        bool bootmap_valid = false;
        int i;
 
@@ -395,7 +396,8 @@ static void __init bootmem_init(void)
        max_low_pfn = 0;
 
        /*
-        * Find the highest page frame number we have available.
+        * Find the highest page frame number we have available
+        * and the lowest used RAM address
         */
        for (i = 0; i < boot_mem_map.nr_map; i++) {
                unsigned long start, end;
@@ -407,6 +409,8 @@ static void __init bootmem_init(void)
                end = PFN_DOWN(boot_mem_map.map[i].addr
                                + boot_mem_map.map[i].size);
 
+               ramstart = min(ramstart, boot_mem_map.map[i].addr);
+
 #ifndef CONFIG_HIGHMEM
                /*
                 * Skip highmem here so we get an accurate max_low_pfn if low
@@ -436,6 +440,13 @@ static void __init bootmem_init(void)
                mapstart = max(reserved_end, start);
        }
 
+       /*
+        * Reserve any memory between the start of RAM and PHYS_OFFSET
+        */
+       if (ramstart > PHYS_OFFSET)
+               add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
+                                 BOOT_MEM_RESERVED);
+
        if (min_low_pfn >= max_low_pfn)
                panic("Incorrect memory mapping !!!");
        if (min_low_pfn > ARCH_PFN_OFFSET) {
@@ -664,9 +675,6 @@ static int __init early_parse_mem(char *p)
 
        add_memory_region(start, size, BOOT_MEM_RAM);
 
-       if (start && start > PHYS_OFFSET)
-               add_memory_region(PHYS_OFFSET, start - PHYS_OFFSET,
-                               BOOT_MEM_RESERVED);
        return 0;
 }
 early_param("mem", early_parse_mem);
index 87dcac2447c8df20a572139d5053624e91acf2ca..9d41732a9146a31545b9114812cb12c669196478 100644 (file)
@@ -572,7 +572,7 @@ asmlinkage void __weak plat_wired_tlb_setup(void)
         */
 }
 
-void __init bmips_cpu_setup(void)
+void bmips_cpu_setup(void)
 {
        void __iomem __maybe_unused *cbr = BMIPS_GET_CBR();
        u32 __maybe_unused cfg;
index 30a155c0a6b07e31ca69d5a7418fb19a1f6e7872..c615abdce119ea34ff6c33d02109cd700036db64 100644 (file)
@@ -16,6 +16,7 @@
 #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
 
 #define PMD_CACHE_INDEX        PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX        PUD_INDEX_SIZE
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
index 949d691094a46d674880dd1e54da971a4161f815..67c5475311ee6e03b29486f8518dc74758263224 100644 (file)
@@ -63,7 +63,8 @@ static inline int hash__hugepd_ok(hugepd_t hpd)
  * keeping the prototype consistent across the two formats.
  */
 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
-                       unsigned int subpg_index, unsigned long hidx)
+                                        unsigned int subpg_index, unsigned long hidx,
+                                        int offset)
 {
        return (hidx << H_PAGE_F_GIX_SHIFT) &
                (H_PAGE_F_SECOND | H_PAGE_F_GIX);
index 338b7da468cef309fa2b787c852e96ab05f014e5..3bcf269f8f55470097ac56680685321bf13e62ba 100644 (file)
@@ -45,7 +45,7 @@
  * generic accessors and iterators here
  */
 #define __real_pte __real_pte
-static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
 {
        real_pte_t rpte;
        unsigned long *hidxp;
@@ -59,7 +59,7 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
         */
        smp_rmb();
 
-       hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+       hidxp = (unsigned long *)(ptep + offset);
        rpte.hidx = *hidxp;
        return rpte;
 }
@@ -86,9 +86,10 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
  * expected to modify the PTE bits accordingly and commit the PTE to memory.
  */
 static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
-               unsigned int subpg_index, unsigned long hidx)
+                                        unsigned int subpg_index,
+                                        unsigned long hidx, int offset)
 {
-       unsigned long *hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+       unsigned long *hidxp = (unsigned long *)(ptep + offset);
 
        rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
        *hidxp = rpte.hidx  | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
@@ -140,13 +141,18 @@ static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long a
 }
 
 #define H_PTE_TABLE_SIZE       PTE_FRAG_SIZE
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
 #define H_PMD_TABLE_SIZE       ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
                                 (sizeof(unsigned long) << PMD_INDEX_SIZE))
 #else
 #define H_PMD_TABLE_SIZE       (sizeof(pmd_t) << PMD_INDEX_SIZE)
 #endif
+#ifdef CONFIG_HUGETLB_PAGE
+#define H_PUD_TABLE_SIZE       ((sizeof(pud_t) << PUD_INDEX_SIZE) +    \
+                                (sizeof(unsigned long) << PUD_INDEX_SIZE))
+#else
 #define H_PUD_TABLE_SIZE       (sizeof(pud_t) << PUD_INDEX_SIZE)
+#endif
 #define H_PGD_TABLE_SIZE       (sizeof(pgd_t) << PGD_INDEX_SIZE)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
index 0920eff731b385221edeb46a04ed5f280ad76ff8..935adcd92a81655ed79e9c21fb6e196233ca69b1 100644 (file)
@@ -23,7 +23,8 @@
                                 H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
 #define H_PGTABLE_RANGE                (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&  defined(CONFIG_PPC_64K_PAGES)
+#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)) && \
+       defined(CONFIG_PPC_64K_PAGES)
 /*
  * only with hash 64k we need to use the second half of pmd page table
  * to store pointer to deposited pgtable_t
 #else
 #define H_PMD_CACHE_INDEX      H_PMD_INDEX_SIZE
 #endif
+/*
+ * We store the slot details in the second half of page table.
+ * Increase the pud level table so that hugetlb ptes can be stored
+ * at pud level.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) &&  defined(CONFIG_PPC_64K_PAGES)
+#define H_PUD_CACHE_INDEX      (H_PUD_INDEX_SIZE + 1)
+#else
+#define H_PUD_CACHE_INDEX      (H_PUD_INDEX_SIZE)
+#endif
 /*
  * Define the address range of the kernel non-linear virtual area
  */
index 1fcfa425cefaf205fe787cc9a480265aec758e0f..4746bc68d446d8e95427e67a86c3493a1d1f1668 100644 (file)
@@ -73,10 +73,16 @@ static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
+       pgd_t *pgd;
+
        if (radix_enabled())
                return radix__pgd_alloc(mm);
-       return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
-               pgtable_gfp_flags(mm, GFP_KERNEL));
+
+       pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+                              pgtable_gfp_flags(mm, GFP_KERNEL));
+       memset(pgd, 0, PGD_TABLE_SIZE);
+
+       return pgd;
 }
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -93,13 +99,13 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-       return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+       return kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
                pgtable_gfp_flags(mm, GFP_KERNEL));
 }
 
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
-       kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+       kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
 }
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
@@ -115,7 +121,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
         * ahead and flush the page walk cache
         */
        flush_tlb_pgtable(tlb, address);
-        pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE);
+       pgtable_free_tlb(tlb, pud, PUD_CACHE_INDEX);
 }
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
index 51017726d49539fda8cba5b346460aeba104c3f9..a6b9f1d746002cd3479686603c76322d52676db9 100644 (file)
@@ -232,11 +232,13 @@ extern unsigned long __pmd_index_size;
 extern unsigned long __pud_index_size;
 extern unsigned long __pgd_index_size;
 extern unsigned long __pmd_cache_index;
+extern unsigned long __pud_cache_index;
 #define PTE_INDEX_SIZE  __pte_index_size
 #define PMD_INDEX_SIZE  __pmd_index_size
 #define PUD_INDEX_SIZE  __pud_index_size
 #define PGD_INDEX_SIZE  __pgd_index_size
 #define PMD_CACHE_INDEX __pmd_cache_index
+#define PUD_CACHE_INDEX __pud_cache_index
 /*
  * Because of use of pte fragments and THP, size of page table
  * are not always derived out of index size above.
@@ -348,7 +350,7 @@ extern unsigned long pci_io_base;
  */
 #ifndef __real_pte
 
-#define __real_pte(e,p)                ((real_pte_t){(e)})
+#define __real_pte(e, p, o)            ((real_pte_t){(e)})
 #define __rpte_to_pte(r)       ((r).pte)
 #define __rpte_to_hidx(r,index)        (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
 
index 176dfb73d42c073df181013c6497cb42f9c8f85d..471b2274fbeba815f04c1957d975f0f1a74bcdbe 100644 (file)
@@ -645,7 +645,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
                                          EXC_HV, SOFTEN_TEST_HV, bitmask)
 
 #define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label, bitmask)           \
-       MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec, bitmask);\
+       MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
        EXCEPTION_RELON_PROLOG_PSERIES_1(label, EXC_HV)
 
 /*
index 88e5e8f17e9896e5a051845235bfa0c822684552..855e17d158b11f04120b9b39a352af91cedb95c8 100644 (file)
 #define PACA_IRQ_HMI           0x20
 #define PACA_IRQ_PMI           0x40
 
+/*
+ * Some soft-masked interrupts must be hard masked until they are replayed
+ * (e.g., because the soft-masked handler does not clear the exception).
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define PACA_IRQ_MUST_HARD_MASK        (PACA_IRQ_EE|PACA_IRQ_PMI)
+#else
+#define PACA_IRQ_MUST_HARD_MASK        (PACA_IRQ_EE)
+#endif
+
 /*
  * flags for paca->irq_soft_mask
  */
@@ -244,7 +254,7 @@ static inline bool lazy_irq_pending(void)
 static inline void may_hard_irq_enable(void)
 {
        get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
-       if (!(get_paca()->irq_happened & PACA_IRQ_EE))
+       if (!(get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK))
                __hard_irq_enable();
 }
 
index 9dcbfa6bbb91e740e483fa6c5c56b8422b8440c8..d8b1e8e7e035b31acd7372eee6bd8017166987d4 100644 (file)
@@ -140,6 +140,12 @@ static inline bool kdump_in_progress(void)
        return false;
 }
 
+static inline void crash_ipi_callback(struct pt_regs *regs) { }
+
+static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+}
+
 #endif /* CONFIG_KEXEC_CORE */
 #endif /* ! __ASSEMBLY__ */
 #endif /* __KERNEL__ */
index 504a3c36ce5c9b311a9c8864d112792ae60fab9f..03bbd1149530d3115d7c9e84c66893428fe4af43 100644 (file)
@@ -24,6 +24,7 @@ extern int icache_44x_need_flush;
 #define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
 
 #define PMD_CACHE_INDEX        PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX        PUD_INDEX_SIZE
 
 #ifndef __ASSEMBLY__
 #define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
index abddf5830ad5550ee2c72875b209d9546afec579..5c5f75d005ada6289633455dc89bc0d2641c2e54 100644 (file)
@@ -27,6 +27,7 @@
 #else
 #define PMD_CACHE_INDEX        PMD_INDEX_SIZE
 #endif
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
 
 /*
  * Define the address range of the kernel non-linear virtual area
index 88187c285c70d5d823ccdf0a572cb8a7e24a6341..9f421641a35c8240cbacf192f6a1b22b4f33c63c 100644 (file)
@@ -44,6 +44,11 @@ extern int sysfs_add_device_to_node(struct device *dev, int nid);
 extern void sysfs_remove_device_from_node(struct device *dev, int nid);
 extern int numa_update_cpu_topology(bool cpus_locked);
 
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+{
+       numa_cpu_lookup_table[cpu] = node;
+}
+
 static inline int early_cpu_to_node(int cpu)
 {
        int nid;
@@ -76,12 +81,16 @@ static inline int numa_update_cpu_topology(bool cpus_locked)
 {
        return 0;
 }
+
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
+
 #endif /* CONFIG_NUMA */
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
 extern int start_topology_update(void);
 extern int stop_topology_update(void);
 extern int prrn_is_enabled(void);
+extern int find_and_online_cpu_nid(int cpu);
 #else
 static inline int start_topology_update(void)
 {
@@ -95,6 +104,10 @@ static inline int prrn_is_enabled(void)
 {
        return 0;
 }
+static inline int find_and_online_cpu_nid(int cpu)
+{
+       return 0;
+}
 #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
 
 #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES)
index ee832d344a5a265018d8fa98f0c38c0a94549db5..9b6e653e501a1264a8bfa398400dd88ecfa38d25 100644 (file)
@@ -943,6 +943,8 @@ kernel_dbg_exc:
 /*
  * An interrupt came in while soft-disabled; We mark paca->irq_happened
  * accordingly and if the interrupt is level sensitive, we hard disable
+ * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
+ * keep these in synch.
  */
 
 .macro masked_interrupt_book3e paca_irq full_mask
index 243d072a225aac1f7c7eaa69b6e5ef8cd21ce2c6..3ac87e53b3da0fdc0c41bd967ac731f5d98d6efb 100644 (file)
@@ -1426,7 +1426,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
  *   triggered and won't automatically refire.
  * - If it was a HMI we return immediately since we handled it in realmode
  *   and it won't refire.
- * - else we hard disable and return.
+ * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
  * This is called with r10 containing the value to OR to the paca field.
  */
 #define MASKED_INTERRUPT(_H)                           \
@@ -1441,8 +1441,8 @@ masked_##_H##interrupt:                                   \
        ori     r10,r10,0xffff;                         \
        mtspr   SPRN_DEC,r10;                           \
        b       MASKED_DEC_HANDLER_LABEL;               \
-1:     andi.   r10,r10,(PACA_IRQ_DBELL|PACA_IRQ_HMI);  \
-       bne     2f;                                     \
+1:     andi.   r10,r10,PACA_IRQ_MUST_HARD_MASK;        \
+       beq     2f;                                     \
        mfspr   r10,SPRN_##_H##SRR1;                    \
        xori    r10,r10,MSR_EE; /* clear MSR_EE */      \
        mtspr   SPRN_##_H##SRR1,r10;                    \
index 5a8bfee6e1877c58ae607445ea77af1ed6b2e869..04d0bbd7a1dd03e13e47e4c5e10a647672955ea3 100644 (file)
@@ -788,7 +788,8 @@ static int register_cpu_online(unsigned int cpu)
        if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
                device_create_file(s, &dev_attr_pir);
 
-       if (cpu_has_feature(CPU_FTR_ARCH_206))
+       if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+               !firmware_has_feature(FW_FEATURE_LPAR))
                device_create_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 
@@ -873,7 +874,8 @@ static int unregister_cpu_online(unsigned int cpu)
        if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
                device_remove_file(s, &dev_attr_pir);
 
-       if (cpu_has_feature(CPU_FTR_ARCH_206))
+       if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+               !firmware_has_feature(FW_FEATURE_LPAR))
                device_remove_file(s, &dev_attr_tscr);
 #endif /* CONFIG_PPC64 */
 
index 1604110c42386c39dea239aaa1a564ba5bc7b38e..916844f99c64e59655d3372ac4e69c731f0751e9 100644 (file)
@@ -216,6 +216,8 @@ static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
        u32 i, n_lmbs;
 
        n_lmbs = of_read_number(prop++, 1);
+       if (n_lmbs == 0)
+               return;
 
        for (i = 0; i < n_lmbs; i++) {
                read_drconf_v1_cell(&lmb, &prop);
@@ -245,6 +247,8 @@ static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
        u32 i, j, lmb_sets;
 
        lmb_sets = of_read_number(prop++, 1);
+       if (lmb_sets == 0)
+               return;
 
        for (i = 0; i < lmb_sets; i++) {
                read_drconf_v2_cell(&dr_cell, &prop);
@@ -354,6 +358,8 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
        struct drmem_lmb *lmb;
 
        drmem_info->n_lmbs = of_read_number(prop++, 1);
+       if (drmem_info->n_lmbs == 0)
+               return;
 
        drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
                                   GFP_KERNEL);
@@ -373,6 +379,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
        int lmb_index;
 
        lmb_sets = of_read_number(prop++, 1);
+       if (lmb_sets == 0)
+               return;
 
        /* first pass, calculate the number of LMBs */
        p = prop;
index 5a69b51d08a3615f319a325536a209b6399db2a7..d573d7d07f25f4d718043a71e199475d3df9597c 100644 (file)
@@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
         * need to add in 0x1 if it's a read-only user page
         */
        rflags = htab_convert_pte_flags(new_pte);
-       rpte = __real_pte(__pte(old_pte), ptep);
+       rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
 
        if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
            !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -117,7 +117,7 @@ repeat:
                        return -1;
                }
                new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
-               new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+               new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
        }
        *ptep = __pte(new_pte & ~H_PAGE_BUSY);
        return 0;
index 2253bbc6a599d7804cb81dc49b2c6f82b81a435d..e601d95c3b20271d7b9cc6483cab402d51d80436 100644 (file)
@@ -86,7 +86,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 
        subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
        vpn  = hpt_vpn(ea, vsid, ssize);
-       rpte = __real_pte(__pte(old_pte), ptep);
+       rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
        /*
         *None of the sub 4k page is hashed
         */
@@ -214,7 +214,7 @@ repeat:
                return -1;
        }
 
-       new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot);
+       new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
        new_pte |= H_PAGE_HASHPTE;
 
        *ptep = __pte(new_pte & ~H_PAGE_BUSY);
@@ -262,7 +262,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
        } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
 
        rflags = htab_convert_pte_flags(new_pte);
-       rpte = __real_pte(__pte(old_pte), ptep);
+       rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
 
        if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
            !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -327,7 +327,7 @@ repeat:
                }
 
                new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
-               new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+               new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
        }
        *ptep = __pte(new_pte & ~H_PAGE_BUSY);
        return 0;
index 7d07c7e17db6708334ea38cad711e1f5c32de1c6..cf290d415dcd8e9e314c63134c49cbd687e63fd7 100644 (file)
@@ -1008,6 +1008,7 @@ void __init hash__early_init_mmu(void)
        __pmd_index_size = H_PMD_INDEX_SIZE;
        __pud_index_size = H_PUD_INDEX_SIZE;
        __pgd_index_size = H_PGD_INDEX_SIZE;
+       __pud_cache_index = H_PUD_CACHE_INDEX;
        __pmd_cache_index = H_PMD_CACHE_INDEX;
        __pte_table_size = H_PTE_TABLE_SIZE;
        __pmd_table_size = H_PMD_TABLE_SIZE;
index 12511f5a015fcfee349e9dd7ac00f6ecd3b8df90..b320f5097a0616dce810c31e42fa42659475d4c3 100644 (file)
@@ -27,7 +27,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
        unsigned long vpn;
        unsigned long old_pte, new_pte;
        unsigned long rflags, pa, sz;
-       long slot;
+       long slot, offset;
 
        BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
 
@@ -63,7 +63,11 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
        } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
 
        rflags = htab_convert_pte_flags(new_pte);
-       rpte = __real_pte(__pte(old_pte), ptep);
+       if (unlikely(mmu_psize == MMU_PAGE_16G))
+               offset = PTRS_PER_PUD;
+       else
+               offset = PTRS_PER_PMD;
+       rpte = __real_pte(__pte(old_pte), ptep, offset);
 
        sz = ((1UL) << shift);
        if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
@@ -104,7 +108,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
                        return -1;
                }
 
-               new_pte |= pte_set_hidx(ptep, rpte, 0, slot);
+               new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
        }
 
        /*
index eb8c6c8c4851a9a7e25a555dd3e61c7b0613fb4e..2b656e67f2eaaa3914cd74d1cd57e36a5060486b 100644 (file)
@@ -100,6 +100,6 @@ void pgtable_cache_init(void)
         * same size as either the pgd or pmd index except with THP enabled
         * on book3s 64
         */
-       if (PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE))
-               pgtable_cache_add(PUD_INDEX_SIZE, pud_ctor);
+       if (PUD_CACHE_INDEX && !PGT_CACHE(PUD_CACHE_INDEX))
+               pgtable_cache_add(PUD_CACHE_INDEX, pud_ctor);
 }
index 314d19ab9385e038a4f38c18a50364da4873eb86..edd8d0bc9364f2843688498b221d90f53647390d 100644 (file)
@@ -143,11 +143,6 @@ static void reset_numa_cpu_lookup_table(void)
                numa_cpu_lookup_table[cpu] = -1;
 }
 
-static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
-{
-       numa_cpu_lookup_table[cpu] = node;
-}
-
 static void map_cpu_to_node(int cpu, int node)
 {
        update_numa_cpu_lookup_table(cpu, node);
index 573a9a2ee4555c53ab2416e70fb4fdd05fb464da..2e10a964e29080149fe60b5d9a2220fdc710bb79 100644 (file)
 #include <linux/of_fdt.h>
 #include <linux/mm.h>
 #include <linux/string_helpers.h>
+#include <linux/stop_machine.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
 #include <asm/dma.h>
 #include <asm/machdep.h>
 #include <asm/mmu.h>
@@ -333,6 +335,22 @@ static void __init radix_init_pgtable(void)
                     "r" (TLBIEL_INVAL_SET_LPID), "r" (0));
        asm volatile("eieio; tlbsync; ptesync" : : : "memory");
        trace_tlbie(0, 0, TLBIEL_INVAL_SET_LPID, 0, 2, 1, 1);
+
+       /*
+        * The init_mm context is given the first available (non-zero) PID,
+        * which is the "guard PID" and contains no page table. PIDR should
+        * never be set to zero because that duplicates the kernel address
+        * space at the 0x0... offset (quadrant 0)!
+        *
+        * An arbitrary PID that may later be allocated by the PID allocator
+        * for userspace processes must not be used either, because that
+        * would cause stale user mappings for that PID on CPUs outside of
+        * the TLB invalidation scheme (because it won't be in mm_cpumask).
+        *
+        * So permanently carve out one PID for the purpose of a guard PID.
+        */
+       init_mm.context.id = mmu_base_pid;
+       mmu_base_pid++;
 }
 
 static void __init radix_init_partition_table(void)
@@ -535,6 +553,7 @@ void __init radix__early_init_mmu(void)
        __pmd_index_size = RADIX_PMD_INDEX_SIZE;
        __pud_index_size = RADIX_PUD_INDEX_SIZE;
        __pgd_index_size = RADIX_PGD_INDEX_SIZE;
+       __pud_cache_index = RADIX_PUD_INDEX_SIZE;
        __pmd_cache_index = RADIX_PMD_INDEX_SIZE;
        __pte_table_size = RADIX_PTE_TABLE_SIZE;
        __pmd_table_size = RADIX_PMD_TABLE_SIZE;
@@ -579,7 +598,8 @@ void __init radix__early_init_mmu(void)
 
        radix_init_iamr();
        radix_init_pgtable();
-
+       /* Switch to the guard PID before turning on MMU */
+       radix__switch_mmu_context(NULL, &init_mm);
        if (cpu_has_feature(CPU_FTR_HVMODE))
                tlbiel_all();
 }
@@ -604,6 +624,7 @@ void radix__early_init_mmu_secondary(void)
        }
        radix_init_iamr();
 
+       radix__switch_mmu_context(NULL, &init_mm);
        if (cpu_has_feature(CPU_FTR_HVMODE))
                tlbiel_all();
 }
@@ -666,6 +687,30 @@ static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
        pud_clear(pud);
 }
 
+struct change_mapping_params {
+       pte_t *pte;
+       unsigned long start;
+       unsigned long end;
+       unsigned long aligned_start;
+       unsigned long aligned_end;
+};
+
+static int stop_machine_change_mapping(void *data)
+{
+       struct change_mapping_params *params =
+                       (struct change_mapping_params *)data;
+
+       if (!data)
+               return -1;
+
+       spin_unlock(&init_mm.page_table_lock);
+       pte_clear(&init_mm, params->aligned_start, params->pte);
+       create_physical_mapping(params->aligned_start, params->start);
+       create_physical_mapping(params->end, params->aligned_end);
+       spin_lock(&init_mm.page_table_lock);
+       return 0;
+}
+
 static void remove_pte_table(pte_t *pte_start, unsigned long addr,
                             unsigned long end)
 {
@@ -694,6 +739,52 @@ static void remove_pte_table(pte_t *pte_start, unsigned long addr,
        }
 }
 
+/*
+ * clear the pte and potentially split the mapping helper
+ */
+static void split_kernel_mapping(unsigned long addr, unsigned long end,
+                               unsigned long size, pte_t *pte)
+{
+       unsigned long mask = ~(size - 1);
+       unsigned long aligned_start = addr & mask;
+       unsigned long aligned_end = addr + size;
+       struct change_mapping_params params;
+       bool split_region = false;
+
+       if ((end - addr) < size) {
+               /*
+                * We're going to clear the PTE, but not flushed
+                * the mapping, time to remap and flush. The
+                * effects if visible outside the processor or
+                * if we are running in code close to the
+                * mapping we cleared, we are in trouble.
+                */
+               if (overlaps_kernel_text(aligned_start, addr) ||
+                       overlaps_kernel_text(end, aligned_end)) {
+                       /*
+                        * Hack, just return, don't pte_clear
+                        */
+                       WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
+                                 "text, not splitting\n", addr, end);
+                       return;
+               }
+               split_region = true;
+       }
+
+       if (split_region) {
+               params.pte = pte;
+               params.start = addr;
+               params.end = end;
+               params.aligned_start = addr & ~(size - 1);
+               params.aligned_end = min_t(unsigned long, aligned_end,
+                               (unsigned long)__va(memblock_end_of_DRAM()));
+               stop_machine(stop_machine_change_mapping, &params, NULL);
+               return;
+       }
+
+       pte_clear(&init_mm, addr, pte);
+}
+
 static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
                             unsigned long end)
 {
@@ -709,13 +800,7 @@ static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
                        continue;
 
                if (pmd_huge(*pmd)) {
-                       if (!IS_ALIGNED(addr, PMD_SIZE) ||
-                           !IS_ALIGNED(next, PMD_SIZE)) {
-                               WARN_ONCE(1, "%s: unaligned range\n", __func__);
-                               continue;
-                       }
-
-                       pte_clear(&init_mm, addr, (pte_t *)pmd);
+                       split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
                        continue;
                }
 
@@ -740,13 +825,7 @@ static void remove_pud_table(pud_t *pud_start, unsigned long addr,
                        continue;
 
                if (pud_huge(*pud)) {
-                       if (!IS_ALIGNED(addr, PUD_SIZE) ||
-                           !IS_ALIGNED(next, PUD_SIZE)) {
-                               WARN_ONCE(1, "%s: unaligned range\n", __func__);
-                               continue;
-                       }
-
-                       pte_clear(&init_mm, addr, (pte_t *)pud);
+                       split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
                        continue;
                }
 
@@ -772,13 +851,7 @@ static void remove_pagetable(unsigned long start, unsigned long end)
                        continue;
 
                if (pgd_huge(*pgd)) {
-                       if (!IS_ALIGNED(addr, PGDIR_SIZE) ||
-                           !IS_ALIGNED(next, PGDIR_SIZE)) {
-                               WARN_ONCE(1, "%s: unaligned range\n", __func__);
-                               continue;
-                       }
-
-                       pte_clear(&init_mm, addr, (pte_t *)pgd);
+                       split_kernel_mapping(addr, end, PGDIR_SIZE, (pte_t *)pgd);
                        continue;
                }
 
index c9a623c2d8a270a14966003258f035fb95d08cff..28c980eb4422284d788716e245934c679925ad86 100644 (file)
@@ -82,6 +82,8 @@ unsigned long __pgd_index_size;
 EXPORT_SYMBOL(__pgd_index_size);
 unsigned long __pmd_cache_index;
 EXPORT_SYMBOL(__pmd_cache_index);
+unsigned long __pud_cache_index;
+EXPORT_SYMBOL(__pud_cache_index);
 unsigned long __pte_table_size;
 EXPORT_SYMBOL(__pte_table_size);
 unsigned long __pmd_table_size;
@@ -471,6 +473,8 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
        if (old & PATB_HR) {
                asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
                             "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+               asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
+                            "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
                trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
        } else {
                asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
index 881ebd53ffc27c8840ae57b088c4d247ba7ef191..9b23f12e863cc14ff324b9c5ffed077c3a1012e8 100644 (file)
@@ -51,7 +51,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
        unsigned int psize;
        int ssize;
        real_pte_t rpte;
-       int i;
+       int i, offset;
 
        i = batch->index;
 
@@ -67,6 +67,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
                psize = get_slice_psize(mm, addr);
                /* Mask the address for the correct page size */
                addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
+               if (unlikely(psize == MMU_PAGE_16G))
+                       offset = PTRS_PER_PUD;
+               else
+                       offset = PTRS_PER_PMD;
 #else
                BUG();
                psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
@@ -78,6 +82,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
                 * support 64k pages, this might be different from the
                 * hardware page size encoded in the slice table. */
                addr &= PAGE_MASK;
+               offset = PTRS_PER_PTE;
        }
 
 
@@ -91,7 +96,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
        }
        WARN_ON(vsid == 0);
        vpn = hpt_vpn(addr, vsid, ssize);
-       rpte = __real_pte(__pte(pte), ptep);
+       rpte = __real_pte(__pte(pte), ptep, offset);
 
        /*
         * Check if we have an active batch on this CPU. If not, just
index dd4c9b8b8a81e6967b29061014918b4f591921df..f6f55ab4980e7684a09942a510daf689f79f6d1c 100644 (file)
@@ -199,9 +199,11 @@ static void disable_nest_pmu_counters(void)
        const struct cpumask *l_cpumask;
 
        get_online_cpus();
-       for_each_online_node(nid) {
+       for_each_node_with_cpus(nid) {
                l_cpumask = cpumask_of_node(nid);
-               cpu = cpumask_first(l_cpumask);
+               cpu = cpumask_first_and(l_cpumask, cpu_online_mask);
+               if (cpu >= nr_cpu_ids)
+                       continue;
                opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
                                       get_hard_smp_processor_id(cpu));
        }
index 2b3eb01ab1107145395b0c697da4743bd2e97c8c..b7c53a51c31bbe5ba5fa62adbf6d1e97c3c1e8c3 100644 (file)
@@ -1063,16 +1063,16 @@ struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
                        rc = PTR_ERR(txwin->paste_kaddr);
                        goto free_window;
                }
+       } else {
+               /*
+                * A user mapping must ensure that context switch issues
+                * CP_ABORT for this thread.
+                */
+               rc = set_thread_uses_vas();
+               if (rc)
+                       goto free_window;
        }
 
-       /*
-        * Now that we have a send window, ensure context switch issues
-        * CP_ABORT for this thread.
-        */
-       rc = -EINVAL;
-       if (set_thread_uses_vas() < 0)
-               goto free_window;
-
        set_vinst_win(vinst, txwin);
 
        return txwin;
index dceb51454d8d212a5cbc78ef891322bddf499800..652d3e96b812b93834323e1a3a60a1e5bbab3612 100644 (file)
@@ -36,6 +36,7 @@
 #include <asm/xics.h>
 #include <asm/xive.h>
 #include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
 
 #include "pseries.h"
 #include "offline_states.h"
@@ -331,6 +332,7 @@ static void pseries_remove_processor(struct device_node *np)
                        BUG_ON(cpu_online(cpu));
                        set_cpu_present(cpu, false);
                        set_hard_smp_processor_id(cpu, -1);
+                       update_numa_cpu_lookup_table(cpu, -1);
                        break;
                }
                if (cpu >= nr_cpu_ids)
@@ -340,8 +342,6 @@ static void pseries_remove_processor(struct device_node *np)
        cpu_maps_update_done();
 }
 
-extern int find_and_online_cpu_nid(int cpu);
-
 static int dlpar_online_cpu(struct device_node *dn)
 {
        int rc = 0;
index 81d8614e73790b1923a3c8cfd336a2531fee76ce..5e1ef915018208c3511ef0e91c0064c8c9474389 100644 (file)
@@ -48,6 +48,28 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
 static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
 
 
+/*
+ * Enable the hotplug interrupt late because processing them may touch other
+ * devices or systems (e.g. hugepages) that have not been initialized at the
+ * subsys stage.
+ */
+int __init init_ras_hotplug_IRQ(void)
+{
+       struct device_node *np;
+
+       /* Hotplug Events */
+       np = of_find_node_by_path("/event-sources/hot-plug-events");
+       if (np != NULL) {
+               if (dlpar_workqueue_init() == 0)
+                       request_event_sources_irqs(np, ras_hotplug_interrupt,
+                                                  "RAS_HOTPLUG");
+               of_node_put(np);
+       }
+
+       return 0;
+}
+machine_late_initcall(pseries, init_ras_hotplug_IRQ);
+
 /*
  * Initialize handlers for the set of interrupts caused by hardware errors
  * and power system events.
@@ -66,15 +88,6 @@ static int __init init_ras_IRQ(void)
                of_node_put(np);
        }
 
-       /* Hotplug Events */
-       np = of_find_node_by_path("/event-sources/hot-plug-events");
-       if (np != NULL) {
-               if (dlpar_workqueue_init() == 0)
-                       request_event_sources_irqs(np, ras_hotplug_interrupt,
-                                          "RAS_HOTPLUG");
-               of_node_put(np);
-       }
-
        /* EPOW Events */
        np = of_find_node_by_path("/event-sources/epow-events");
        if (np != NULL) {
index d9c4c93660491849029044d37ab8e60160b0d7de..091f1d0d0af190a0d6f274b8cab32960cc1eef86 100644 (file)
@@ -356,7 +356,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
 
        rc = plpar_int_get_queue_info(0, target, prio, &esn_page, &esn_size);
        if (rc) {
-               pr_err("Error %lld getting queue info prio %d\n", rc, prio);
+               pr_err("Error %lld getting queue info CPU %d prio %d\n", rc,
+                      target, prio);
                rc = -EIO;
                goto fail;
        }
@@ -370,7 +371,8 @@ static int xive_spapr_configure_queue(u32 target, struct xive_q *q, u8 prio,
        /* Configure and enable the queue in HW */
        rc = plpar_int_set_queue_config(flags, target, prio, qpage_phys, order);
        if (rc) {
-               pr_err("Error %lld setting queue for prio %d\n", rc, prio);
+               pr_err("Error %lld setting queue for CPU %d prio %d\n", rc,
+                      target, prio);
                rc = -EIO;
        } else {
                q->qpage = qpage;
@@ -389,8 +391,8 @@ static int xive_spapr_setup_queue(unsigned int cpu, struct xive_cpu *xc,
        if (IS_ERR(qpage))
                return PTR_ERR(qpage);
 
-       return xive_spapr_configure_queue(cpu, q, prio, qpage,
-                                         xive_queue_shift);
+       return xive_spapr_configure_queue(get_hard_smp_processor_id(cpu),
+                                         q, prio, qpage, xive_queue_shift);
 }
 
 static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
@@ -399,10 +401,12 @@ static void xive_spapr_cleanup_queue(unsigned int cpu, struct xive_cpu *xc,
        struct xive_q *q = &xc->queue[prio];
        unsigned int alloc_order;
        long rc;
+       int hw_cpu = get_hard_smp_processor_id(cpu);
 
-       rc = plpar_int_set_queue_config(0, cpu, prio, 0, 0);
+       rc = plpar_int_set_queue_config(0, hw_cpu, prio, 0, 0);
        if (rc)
-               pr_err("Error %ld setting queue for prio %d\n", rc, prio);
+               pr_err("Error %ld setting queue for CPU %d prio %d\n", rc,
+                      hw_cpu, prio);
 
        alloc_order = xive_alloc_order(xive_queue_shift);
        free_pages((unsigned long)q->qpage, alloc_order);
index 6bf594ace663ec82f746132b4f62fc351bf5160c..8767e45f1b2b70953583a7a7157707696466d407 100644 (file)
@@ -430,6 +430,8 @@ config SPARC_LEON
        depends on SPARC32
        select USB_EHCI_BIG_ENDIAN_MMIO
        select USB_EHCI_BIG_ENDIAN_DESC
+       select USB_UHCI_BIG_ENDIAN_MMIO
+       select USB_UHCI_BIG_ENDIAN_DESC
        ---help---
          If you say Y here if you are running on a SPARC-LEON processor.
          The LEON processor is a synthesizable VHDL model of the
index aff152c87cf4ba62ed26ed6b7eb567b576dd84fd..5a82bac5e0bc7985529aa537e981997109945309 100644 (file)
@@ -1,6 +1,7 @@
 boot/compressed/vmlinux
 tools/test_get_len
 tools/insn_sanity
+tools/insn_decoder_test
 purgatory/kexec-purgatory.c
 purgatory/purgatory.ro
 
index 63bf349b2b24a8807c4f65869af50bab99e4c2f7..c1236b187824e222a2c7fddd417272369767b06b 100644 (file)
@@ -423,12 +423,6 @@ config X86_MPPARSE
          For old smp systems that do not have proper acpi support. Newer systems
          (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
 
-config X86_BIGSMP
-       bool "Support for big SMP systems with more than 8 CPUs"
-       depends on X86_32 && SMP
-       ---help---
-         This option is needed for the systems that have more than 8 CPUs
-
 config GOLDFISH
        def_bool y
        depends on X86_GOLDFISH
@@ -460,6 +454,12 @@ config INTEL_RDT
          Say N if unsure.
 
 if X86_32
+config X86_BIGSMP
+       bool "Support for big SMP systems with more than 8 CPUs"
+       depends on SMP
+       ---help---
+         This option is needed for the systems that have more than 8 CPUs
+
 config X86_EXTENDED_PLATFORM
        bool "Support for extended (non-PC) x86 platforms"
        default y
@@ -949,25 +949,66 @@ config MAXSMP
          Enable maximum number of CPUS and NUMA Nodes for this architecture.
          If unsure, say N.
 
+#
+# The maximum number of CPUs supported:
+#
+# The main config value is NR_CPUS, which defaults to NR_CPUS_DEFAULT,
+# and which can be configured interactively in the
+# [NR_CPUS_RANGE_BEGIN ... NR_CPUS_RANGE_END] range.
+#
+# The ranges are different on 32-bit and 64-bit kernels, depending on
+# hardware capabilities and scalability features of the kernel.
+#
+# ( If MAXSMP is enabled we just use the highest possible value and disable
+#   interactive configuration. )
+#
+
+config NR_CPUS_RANGE_BEGIN
+       int
+       default NR_CPUS_RANGE_END if MAXSMP
+       default    1 if !SMP
+       default    2
+
+config NR_CPUS_RANGE_END
+       int
+       depends on X86_32
+       default   64 if  SMP &&  X86_BIGSMP
+       default    8 if  SMP && !X86_BIGSMP
+       default    1 if !SMP
+
+config NR_CPUS_RANGE_END
+       int
+       depends on X86_64
+       default 8192 if  SMP && ( MAXSMP ||  CPUMASK_OFFSTACK)
+       default  512 if  SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+       default    1 if !SMP
+
+config NR_CPUS_DEFAULT
+       int
+       depends on X86_32
+       default   32 if  X86_BIGSMP
+       default    8 if  SMP
+       default    1 if !SMP
+
+config NR_CPUS_DEFAULT
+       int
+       depends on X86_64
+       default 8192 if  MAXSMP
+       default   64 if  SMP
+       default    1 if !SMP
+
 config NR_CPUS
        int "Maximum number of CPUs" if SMP && !MAXSMP
-       range 2 8 if SMP && X86_32 && !X86_BIGSMP
-       range 2 64 if SMP && X86_32 && X86_BIGSMP
-       range 2 512 if SMP && !MAXSMP && !CPUMASK_OFFSTACK && X86_64
-       range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64
-       default "1" if !SMP
-       default "8192" if MAXSMP
-       default "32" if SMP && X86_BIGSMP
-       default "8" if SMP && X86_32
-       default "64" if SMP
+       range NR_CPUS_RANGE_BEGIN NR_CPUS_RANGE_END
+       default NR_CPUS_DEFAULT
        ---help---
          This allows you to specify the maximum number of CPUs which this
          kernel will support.  If CPUMASK_OFFSTACK is enabled, the maximum
          supported value is 8192, otherwise the maximum value is 512.  The
          minimum value which makes sense is 2.
 
-         This is purely to save memory - each supported CPU adds
-         approximately eight kilobytes to the kernel image.
+         This is purely to save memory: each supported CPU adds about 8KB
+         to the kernel image.
 
 config SCHED_SMT
        bool "SMT (Hyperthreading) scheduler support"
@@ -1363,7 +1404,7 @@ config HIGHMEM4G
 
 config HIGHMEM64G
        bool "64GB"
-       depends on !M486
+       depends on !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6
        select X86_PAE
        ---help---
          Select this if you have a 32-bit processor and more than 4
index 65a9a4716e34f55394d057335629d0b32ec55ada..8b8d2297d4867b06acaecf31e3be7124888f4d79 100644 (file)
@@ -374,7 +374,7 @@ config X86_TSC
 
 config X86_CMPXCHG64
        def_bool y
-       depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
+       depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8
 
 # this should be set for all -march=.. options where the compiler
 # generates cmov.
@@ -385,7 +385,7 @@ config X86_CMOV
 config X86_MINIMUM_CPU_FAMILY
        int
        default "64" if X86_64
-       default "6" if X86_32 && X86_P6_NOP
+       default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
        default "5" if X86_32 && X86_CMPXCHG64
        default "4"
 
index 36870b26067a73655b96d137c04977c8f392237d..d08805032f0193ab96dde00890a0502a9f2b24e6 100644 (file)
@@ -57,10 +57,12 @@ void sha512_mb_mgr_init_avx2(struct sha512_mb_mgr *state)
 {
        unsigned int j;
 
-       state->lens[0] = 0;
-       state->lens[1] = 1;
-       state->lens[2] = 2;
-       state->lens[3] = 3;
+       /* initially all lanes are unused */
+       state->lens[0] = 0xFFFFFFFF00000000;
+       state->lens[1] = 0xFFFFFFFF00000001;
+       state->lens[2] = 0xFFFFFFFF00000002;
+       state->lens[3] = 0xFFFFFFFF00000003;
+
        state->unused_lanes = 0xFF03020100;
        for (j = 0; j < 4; j++)
                state->ldata[j].job_in_lane = NULL;
index 3f48f695d5e6ac6546a009c734fcac517564b24d..dce7092ab24a247c1165f80b17c687d255023a05 100644 (file)
@@ -97,80 +97,69 @@ For 32-bit we have the following conventions - kernel is built with
 
 #define SIZEOF_PTREGS  21*8
 
-       .macro ALLOC_PT_GPREGS_ON_STACK
-       addq    $-(15*8), %rsp
-       .endm
+.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax
+       /*
+        * Push registers and sanitize registers of values that a
+        * speculation attack might otherwise want to exploit. The
+        * lower registers are likely clobbered well before they
+        * could be put to use in a speculative execution gadget.
+        * Interleave XOR with PUSH for better uop scheduling:
+        */
+       pushq   %rdi            /* pt_regs->di */
+       pushq   %rsi            /* pt_regs->si */
+       pushq   \rdx            /* pt_regs->dx */
+       pushq   %rcx            /* pt_regs->cx */
+       pushq   \rax            /* pt_regs->ax */
+       pushq   %r8             /* pt_regs->r8 */
+       xorq    %r8, %r8        /* nospec   r8 */
+       pushq   %r9             /* pt_regs->r9 */
+       xorq    %r9, %r9        /* nospec   r9 */
+       pushq   %r10            /* pt_regs->r10 */
+       xorq    %r10, %r10      /* nospec   r10 */
+       pushq   %r11            /* pt_regs->r11 */
+       xorq    %r11, %r11      /* nospec   r11*/
+       pushq   %rbx            /* pt_regs->rbx */
+       xorl    %ebx, %ebx      /* nospec   rbx*/
+       pushq   %rbp            /* pt_regs->rbp */
+       xorl    %ebp, %ebp      /* nospec   rbp*/
+       pushq   %r12            /* pt_regs->r12 */
+       xorq    %r12, %r12      /* nospec   r12*/
+       pushq   %r13            /* pt_regs->r13 */
+       xorq    %r13, %r13      /* nospec   r13*/
+       pushq   %r14            /* pt_regs->r14 */
+       xorq    %r14, %r14      /* nospec   r14*/
+       pushq   %r15            /* pt_regs->r15 */
+       xorq    %r15, %r15      /* nospec   r15*/
+       UNWIND_HINT_REGS
+.endm
 
-       .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
-       .if \r11
-       movq %r11, 6*8+\offset(%rsp)
-       .endif
-       .if \r8910
-       movq %r10, 7*8+\offset(%rsp)
-       movq %r9,  8*8+\offset(%rsp)
-       movq %r8,  9*8+\offset(%rsp)
-       .endif
-       .if \rax
-       movq %rax, 10*8+\offset(%rsp)
-       .endif
-       .if \rcx
-       movq %rcx, 11*8+\offset(%rsp)
-       .endif
-       movq %rdx, 12*8+\offset(%rsp)
-       movq %rsi, 13*8+\offset(%rsp)
-       movq %rdi, 14*8+\offset(%rsp)
-       UNWIND_HINT_REGS offset=\offset extra=0
-       .endm
-       .macro SAVE_C_REGS offset=0
-       SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
-       .endm
-       .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
-       SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
-       .endm
-       .macro SAVE_C_REGS_EXCEPT_R891011
-       SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
-       .endm
-       .macro SAVE_C_REGS_EXCEPT_RCX_R891011
-       SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
-       .endm
-       .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
-       SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
-       .endm
-
-       .macro SAVE_EXTRA_REGS offset=0
-       movq %r15, 0*8+\offset(%rsp)
-       movq %r14, 1*8+\offset(%rsp)
-       movq %r13, 2*8+\offset(%rsp)
-       movq %r12, 3*8+\offset(%rsp)
-       movq %rbp, 4*8+\offset(%rsp)
-       movq %rbx, 5*8+\offset(%rsp)
-       UNWIND_HINT_REGS offset=\offset
-       .endm
-
-       .macro POP_EXTRA_REGS
+.macro POP_REGS pop_rdi=1 skip_r11rcx=0
        popq %r15
        popq %r14
        popq %r13
        popq %r12
        popq %rbp
        popq %rbx
-       .endm
-
-       .macro POP_C_REGS
+       .if \skip_r11rcx
+       popq %rsi
+       .else
        popq %r11
+       .endif
        popq %r10
        popq %r9
        popq %r8
        popq %rax
+       .if \skip_r11rcx
+       popq %rsi
+       .else
        popq %rcx
+       .endif
        popq %rdx
        popq %rsi
+       .if \pop_rdi
        popq %rdi
-       .endm
-
-       .macro icebp
-       .byte 0xf1
-       .endm
+       .endif
+.endm
 
 /*
  * This is a sneaky trick to help the unwinder find pt_regs on the stack.  The
@@ -178,7 +167,7 @@ For 32-bit we have the following conventions - kernel is built with
  * is just setting the LSB, which makes it an invalid stack address and is also
  * a signal to the unwinder that it's a pt_regs pointer in disguise.
  *
- * NOTE: This macro must be used *after* SAVE_EXTRA_REGS because it corrupts
+ * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts
  * the original rbp.
  */
 .macro ENCODE_FRAME_POINTER ptregs_offset=0
index 30c8c5344c4a5dcfeb96d0711a322e50de33d324..8971bd64d515c5bb4a9b95108fd802b8418764f2 100644 (file)
@@ -213,7 +213,7 @@ ENTRY(entry_SYSCALL_64)
 
        swapgs
        /*
-        * This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
+        * This path is only taken when PAGE_TABLE_ISOLATION is disabled so it
         * is not required to switch CR3.
         */
        movq    %rsp, PER_CPU_VAR(rsp_scratch)
@@ -227,22 +227,8 @@ ENTRY(entry_SYSCALL_64)
        pushq   %rcx                            /* pt_regs->ip */
 GLOBAL(entry_SYSCALL_64_after_hwframe)
        pushq   %rax                            /* pt_regs->orig_ax */
-       pushq   %rdi                            /* pt_regs->di */
-       pushq   %rsi                            /* pt_regs->si */
-       pushq   %rdx                            /* pt_regs->dx */
-       pushq   %rcx                            /* pt_regs->cx */
-       pushq   $-ENOSYS                        /* pt_regs->ax */
-       pushq   %r8                             /* pt_regs->r8 */
-       pushq   %r9                             /* pt_regs->r9 */
-       pushq   %r10                            /* pt_regs->r10 */
-       pushq   %r11                            /* pt_regs->r11 */
-       pushq   %rbx                            /* pt_regs->rbx */
-       pushq   %rbp                            /* pt_regs->rbp */
-       pushq   %r12                            /* pt_regs->r12 */
-       pushq   %r13                            /* pt_regs->r13 */
-       pushq   %r14                            /* pt_regs->r14 */
-       pushq   %r15                            /* pt_regs->r15 */
-       UNWIND_HINT_REGS
+
+       PUSH_AND_CLEAR_REGS rax=$-ENOSYS
 
        TRACE_IRQS_OFF
 
@@ -321,15 +307,7 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
 syscall_return_via_sysret:
        /* rcx and r11 are already restored (see code above) */
        UNWIND_HINT_EMPTY
-       POP_EXTRA_REGS
-       popq    %rsi    /* skip r11 */
-       popq    %r10
-       popq    %r9
-       popq    %r8
-       popq    %rax
-       popq    %rsi    /* skip rcx */
-       popq    %rdx
-       popq    %rsi
+       POP_REGS pop_rdi=0 skip_r11rcx=1
 
        /*
         * Now all regs are restored except RSP and RDI.
@@ -559,9 +537,7 @@ END(irq_entries_start)
        call    switch_to_thread_stack
 1:
 
-       ALLOC_PT_GPREGS_ON_STACK
-       SAVE_C_REGS
-       SAVE_EXTRA_REGS
+       PUSH_AND_CLEAR_REGS
        ENCODE_FRAME_POINTER
 
        testb   $3, CS(%rsp)
@@ -622,15 +598,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
        ud2
 1:
 #endif
-       POP_EXTRA_REGS
-       popq    %r11
-       popq    %r10
-       popq    %r9
-       popq    %r8
-       popq    %rax
-       popq    %rcx
-       popq    %rdx
-       popq    %rsi
+       POP_REGS pop_rdi=0
 
        /*
         * The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
@@ -688,8 +656,7 @@ GLOBAL(restore_regs_and_return_to_kernel)
        ud2
 1:
 #endif
-       POP_EXTRA_REGS
-       POP_C_REGS
+       POP_REGS
        addq    $8, %rsp        /* skip regs->orig_ax */
        /*
         * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -908,7 +875,9 @@ ENTRY(\sym)
        pushq   $-1                             /* ORIG_RAX: no syscall to restart */
        .endif
 
-       ALLOC_PT_GPREGS_ON_STACK
+       /* Save all registers in pt_regs */
+       PUSH_AND_CLEAR_REGS
+       ENCODE_FRAME_POINTER
 
        .if \paranoid < 2
        testb   $3, CS(%rsp)                    /* If coming from userspace, switch stacks */
@@ -1121,9 +1090,7 @@ ENTRY(xen_failsafe_callback)
        addq    $0x30, %rsp
        UNWIND_HINT_IRET_REGS
        pushq   $-1 /* orig_ax = -1 => not a system call */
-       ALLOC_PT_GPREGS_ON_STACK
-       SAVE_C_REGS
-       SAVE_EXTRA_REGS
+       PUSH_AND_CLEAR_REGS
        ENCODE_FRAME_POINTER
        jmp     error_exit
 END(xen_failsafe_callback)
@@ -1163,16 +1130,13 @@ idtentry machine_check          do_mce                  has_error_code=0        paranoid=1
 #endif
 
 /*
- * Save all registers in pt_regs, and switch gs if needed.
+ * Switch gs if needed.
  * Use slow, but surefire "are we in kernel?" check.
  * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(paranoid_entry)
        UNWIND_HINT_FUNC
        cld
-       SAVE_C_REGS 8
-       SAVE_EXTRA_REGS 8
-       ENCODE_FRAME_POINTER 8
        movl    $1, %ebx
        movl    $MSR_GS_BASE, %ecx
        rdmsr
@@ -1211,21 +1175,18 @@ ENTRY(paranoid_exit)
        jmp     .Lparanoid_exit_restore
 .Lparanoid_exit_no_swapgs:
        TRACE_IRQS_IRETQ_DEBUG
+       RESTORE_CR3     scratch_reg=%rbx save_reg=%r14
 .Lparanoid_exit_restore:
        jmp restore_regs_and_return_to_kernel
 END(paranoid_exit)
 
 /*
- * Save all registers in pt_regs, and switch gs if needed.
+ * Switch gs if needed.
  * Return: EBX=0: came from user mode; EBX=1: otherwise
  */
 ENTRY(error_entry)
-       UNWIND_HINT_FUNC
+       UNWIND_HINT_REGS offset=8
        cld
-       SAVE_C_REGS 8
-       SAVE_EXTRA_REGS 8
-       ENCODE_FRAME_POINTER 8
-       xorl    %ebx, %ebx
        testb   $3, CS+8(%rsp)
        jz      .Lerror_kernelspace
 
@@ -1406,22 +1367,7 @@ ENTRY(nmi)
        pushq   1*8(%rdx)       /* pt_regs->rip */
        UNWIND_HINT_IRET_REGS
        pushq   $-1             /* pt_regs->orig_ax */
-       pushq   %rdi            /* pt_regs->di */
-       pushq   %rsi            /* pt_regs->si */
-       pushq   (%rdx)          /* pt_regs->dx */
-       pushq   %rcx            /* pt_regs->cx */
-       pushq   %rax            /* pt_regs->ax */
-       pushq   %r8             /* pt_regs->r8 */
-       pushq   %r9             /* pt_regs->r9 */
-       pushq   %r10            /* pt_regs->r10 */
-       pushq   %r11            /* pt_regs->r11 */
-       pushq   %rbx            /* pt_regs->rbx */
-       pushq   %rbp            /* pt_regs->rbp */
-       pushq   %r12            /* pt_regs->r12 */
-       pushq   %r13            /* pt_regs->r13 */
-       pushq   %r14            /* pt_regs->r14 */
-       pushq   %r15            /* pt_regs->r15 */
-       UNWIND_HINT_REGS
+       PUSH_AND_CLEAR_REGS rdx=(%rdx)
        ENCODE_FRAME_POINTER
 
        /*
@@ -1631,7 +1577,8 @@ end_repeat_nmi:
         * frame to point back to repeat_nmi.
         */
        pushq   $-1                             /* ORIG_RAX: no syscall to restart */
-       ALLOC_PT_GPREGS_ON_STACK
+       PUSH_AND_CLEAR_REGS
+       ENCODE_FRAME_POINTER
 
        /*
         * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
@@ -1655,8 +1602,7 @@ end_repeat_nmi:
 nmi_swapgs:
        SWAPGS_UNSAFE_STACK
 nmi_restore:
-       POP_EXTRA_REGS
-       POP_C_REGS
+       POP_REGS
 
        /*
         * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
index 98d5358e4041a7e144ec566f7db19ff054cedbcc..fd65e016e4133f5634545062d098fc25fb5c0b1d 100644 (file)
@@ -85,15 +85,25 @@ ENTRY(entry_SYSENTER_compat)
        pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
+       xorq    %r8, %r8                /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
+       xorq    %r9, %r9                /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
+       xorq    %r10, %r10              /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
+       xorq    %r11, %r11              /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
+       xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
+       xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   $0                      /* pt_regs->r12 = 0 */
+       xorq    %r12, %r12              /* nospec   r12 */
        pushq   $0                      /* pt_regs->r13 = 0 */
+       xorq    %r13, %r13              /* nospec   r13 */
        pushq   $0                      /* pt_regs->r14 = 0 */
+       xorq    %r14, %r14              /* nospec   r14 */
        pushq   $0                      /* pt_regs->r15 = 0 */
+       xorq    %r15, %r15              /* nospec   r15 */
        cld
 
        /*
@@ -214,15 +224,25 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
        pushq   %rbp                    /* pt_regs->cx (stashed in bp) */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
+       xorq    %r8, %r8                /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
+       xorq    %r9, %r9                /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
+       xorq    %r10, %r10              /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
+       xorq    %r11, %r11              /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
+       xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp (will be overwritten) */
+       xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   $0                      /* pt_regs->r12 = 0 */
+       xorq    %r12, %r12              /* nospec   r12 */
        pushq   $0                      /* pt_regs->r13 = 0 */
+       xorq    %r13, %r13              /* nospec   r13 */
        pushq   $0                      /* pt_regs->r14 = 0 */
+       xorq    %r14, %r14              /* nospec   r14 */
        pushq   $0                      /* pt_regs->r15 = 0 */
+       xorq    %r15, %r15              /* nospec   r15 */
 
        /*
         * User mode is traced as though IRQs are on, and SYSENTER
@@ -338,15 +358,25 @@ ENTRY(entry_INT80_compat)
        pushq   %rcx                    /* pt_regs->cx */
        pushq   $-ENOSYS                /* pt_regs->ax */
        pushq   $0                      /* pt_regs->r8  = 0 */
+       xorq    %r8, %r8                /* nospec   r8 */
        pushq   $0                      /* pt_regs->r9  = 0 */
+       xorq    %r9, %r9                /* nospec   r9 */
        pushq   $0                      /* pt_regs->r10 = 0 */
+       xorq    %r10, %r10              /* nospec   r10 */
        pushq   $0                      /* pt_regs->r11 = 0 */
+       xorq    %r11, %r11              /* nospec   r11 */
        pushq   %rbx                    /* pt_regs->rbx */
+       xorl    %ebx, %ebx              /* nospec   rbx */
        pushq   %rbp                    /* pt_regs->rbp */
+       xorl    %ebp, %ebp              /* nospec   rbp */
        pushq   %r12                    /* pt_regs->r12 */
+       xorq    %r12, %r12              /* nospec   r12 */
        pushq   %r13                    /* pt_regs->r13 */
+       xorq    %r13, %r13              /* nospec   r13 */
        pushq   %r14                    /* pt_regs->r14 */
+       xorq    %r14, %r14              /* nospec   r14 */
        pushq   %r15                    /* pt_regs->r15 */
+       xorq    %r15, %r15              /* nospec   r15 */
        cld
 
        /*
index 731153a4681e73f761dea8c0c15ce6757b89860e..56457cb73448b494b1aed2138fcee8e13d50d81b 100644 (file)
@@ -3559,7 +3559,7 @@ static int intel_snb_pebs_broken(int cpu)
                break;
 
        case INTEL_FAM6_SANDYBRIDGE_X:
-               switch (cpu_data(cpu).x86_mask) {
+               switch (cpu_data(cpu).x86_stepping) {
                case 6: rev = 0x618; break;
                case 7: rev = 0x70c; break;
                }
index ae64d0b69729dbb23c436d6c3bbf360a3cafb53b..cf372b90557ed4e8a788c8f97b515ac956d2512e 100644 (file)
@@ -1186,7 +1186,7 @@ void __init intel_pmu_lbr_init_atom(void)
         * on PMU interrupt
         */
        if (boot_cpu_data.x86_model == 28
-           && boot_cpu_data.x86_mask < 10) {
+           && boot_cpu_data.x86_stepping < 10) {
                pr_cont("LBR disabled due to erratum");
                return;
        }
index a5604c3529308b7cfc1dc336a8d53496d531659b..408879b0c0d4e41c56906d2464734279c9360f7e 100644 (file)
@@ -234,7 +234,7 @@ static __initconst const struct x86_pmu p6_pmu = {
 
 static __init void p6_pmu_rdpmc_quirk(void)
 {
-       if (boot_cpu_data.x86_mask < 9) {
+       if (boot_cpu_data.x86_stepping < 9) {
                /*
                 * PPro erratum 26; fixed in stepping 9 and above.
                 */
index 44f5d79d51056b036e7ef4536d7bfe340dae9747..11881726ed37290128f9bca07b9944aa2d818677 100644 (file)
@@ -94,7 +94,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
        if (boot_cpu_data.x86 == 0x0F &&
            boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
            boot_cpu_data.x86_model <= 0x05 &&
-           boot_cpu_data.x86_mask < 0x0A)
+           boot_cpu_data.x86_stepping < 0x0A)
                return 1;
        else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E))
                return 1;
index 30d40614601641b9fc2dbd5ce734ff86d0cdd731..e1259f043ae999fa21e1f998431ab12cd73a11ea 100644 (file)
@@ -40,7 +40,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 
        asm ("cmp %1,%2; sbb %0,%0;"
                        :"=r" (mask)
-                       :"r"(size),"r" (index)
+                       :"g"(size),"r" (index)
                        :"cc");
        return mask;
 }
index 34d99af43994453e1cec89aa202cc17b052ee690..6804d66427673ec314659944e65052b5dfba273e 100644 (file)
@@ -5,23 +5,20 @@
 #include <linux/stringify.h>
 
 /*
- * Since some emulators terminate on UD2, we cannot use it for WARN.
- * Since various instruction decoders disagree on the length of UD1,
- * we cannot use it either. So use UD0 for WARN.
+ * Despite that some emulators terminate on UD2, we use it for WARN().
  *
- * (binutils knows about "ud1" but {en,de}codes it as 2 bytes, whereas
- *  our kernel decoder thinks it takes a ModRM byte, which seems consistent
- *  with various things like the Intel SDM instruction encoding rules)
+ * Since various instruction decoders/specs disagree on the encoding of
+ * UD0/UD1.
  */
 
-#define ASM_UD0                ".byte 0x0f, 0xff"
+#define ASM_UD0                ".byte 0x0f, 0xff" /* + ModRM (for Intel) */
 #define ASM_UD1                ".byte 0x0f, 0xb9" /* + ModRM */
 #define ASM_UD2                ".byte 0x0f, 0x0b"
 
 #define INSN_UD0       0xff0f
 #define INSN_UD2       0x0b0f
 
-#define LEN_UD0                2
+#define LEN_UD2                2
 
 #ifdef CONFIG_GENERIC_BUG
 
@@ -77,7 +74,11 @@ do {                                                         \
        unreachable();                                          \
 } while (0)
 
-#define __WARN_FLAGS(flags)    _BUG_FLAGS(ASM_UD0, BUGFLAG_WARNING|(flags))
+#define __WARN_FLAGS(flags)                                    \
+do {                                                           \
+       _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags));           \
+       annotate_reachable();                                   \
+} while (0)
 
 #include <asm-generic/bug.h>
 
index 70eddb3922ff7b3e44fc27b9426da39aa4e2b6fa..736771c9822ef965233b7114fd0b1a025e8c3a46 100644 (file)
@@ -148,45 +148,46 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
  */
 static __always_inline __pure bool _static_cpu_has(u16 bit)
 {
-               asm_volatile_goto("1: jmp 6f\n"
-                        "2:\n"
-                        ".skip -(((5f-4f) - (2b-1b)) > 0) * "
-                                "((5f-4f) - (2b-1b)),0x90\n"
-                        "3:\n"
-                        ".section .altinstructions,\"a\"\n"
-                        " .long 1b - .\n"              /* src offset */
-                        " .long 4f - .\n"              /* repl offset */
-                        " .word %P1\n"                 /* always replace */
-                        " .byte 3b - 1b\n"             /* src len */
-                        " .byte 5f - 4f\n"             /* repl len */
-                        " .byte 3b - 2b\n"             /* pad len */
-                        ".previous\n"
-                        ".section .altinstr_replacement,\"ax\"\n"
-                        "4: jmp %l[t_no]\n"
-                        "5:\n"
-                        ".previous\n"
-                        ".section .altinstructions,\"a\"\n"
-                        " .long 1b - .\n"              /* src offset */
-                        " .long 0\n"                   /* no replacement */
-                        " .word %P0\n"                 /* feature bit */
-                        " .byte 3b - 1b\n"             /* src len */
-                        " .byte 0\n"                   /* repl len */
-                        " .byte 0\n"                   /* pad len */
-                        ".previous\n"
-                        ".section .altinstr_aux,\"ax\"\n"
-                        "6:\n"
-                        " testb %[bitnum],%[cap_byte]\n"
-                        " jnz %l[t_yes]\n"
-                        " jmp %l[t_no]\n"
-                        ".previous\n"
-                        : : "i" (bit), "i" (X86_FEATURE_ALWAYS),
-                            [bitnum] "i" (1 << (bit & 7)),
-                            [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
-                        : : t_yes, t_no);
-       t_yes:
-               return true;
-       t_no:
-               return false;
+       asm_volatile_goto("1: jmp 6f\n"
+                "2:\n"
+                ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+                        "((5f-4f) - (2b-1b)),0x90\n"
+                "3:\n"
+                ".section .altinstructions,\"a\"\n"
+                " .long 1b - .\n"              /* src offset */
+                " .long 4f - .\n"              /* repl offset */
+                " .word %P[always]\n"          /* always replace */
+                " .byte 3b - 1b\n"             /* src len */
+                " .byte 5f - 4f\n"             /* repl len */
+                " .byte 3b - 2b\n"             /* pad len */
+                ".previous\n"
+                ".section .altinstr_replacement,\"ax\"\n"
+                "4: jmp %l[t_no]\n"
+                "5:\n"
+                ".previous\n"
+                ".section .altinstructions,\"a\"\n"
+                " .long 1b - .\n"              /* src offset */
+                " .long 0\n"                   /* no replacement */
+                " .word %P[feature]\n"         /* feature bit */
+                " .byte 3b - 1b\n"             /* src len */
+                " .byte 0\n"                   /* repl len */
+                " .byte 0\n"                   /* pad len */
+                ".previous\n"
+                ".section .altinstr_aux,\"ax\"\n"
+                "6:\n"
+                " testb %[bitnum],%[cap_byte]\n"
+                " jnz %l[t_yes]\n"
+                " jmp %l[t_no]\n"
+                ".previous\n"
+                : : [feature]  "i" (bit),
+                    [always]   "i" (X86_FEATURE_ALWAYS),
+                    [bitnum]   "i" (1 << (bit & 7)),
+                    [cap_byte] "m" (((const char *)boot_cpu_data.x86_capability)[bit >> 3])
+                : : t_yes, t_no);
+t_yes:
+       return true;
+t_no:
+       return false;
 }
 
 #define static_cpu_has(bit)                                    \
index 4d57894635f242da061e6a10acccaec70ae1dbc0..76b058533e473b10d99e7a1ee4b661b5b2b2b14d 100644 (file)
@@ -6,6 +6,7 @@
 #include <asm/alternative.h>
 #include <asm/alternative-asm.h>
 #include <asm/cpufeatures.h>
+#include <asm/msr-index.h>
 
 #ifdef __ASSEMBLY__
 
@@ -164,10 +165,15 @@ static inline void vmexit_fill_RSB(void)
 
 static inline void indirect_branch_prediction_barrier(void)
 {
-       alternative_input("",
-                         "call __ibp_barrier",
-                         X86_FEATURE_USE_IBPB,
-                         ASM_NO_INPUT_CLOBBER("eax", "ecx", "edx", "memory"));
+       asm volatile(ALTERNATIVE("",
+                                "movl %[msr], %%ecx\n\t"
+                                "movl %[val], %%eax\n\t"
+                                "movl $0, %%edx\n\t"
+                                "wrmsr",
+                                X86_FEATURE_USE_IBPB)
+                    : : [msr] "i" (MSR_IA32_PRED_CMD),
+                        [val] "i" (PRED_CMD_IBPB)
+                    : "eax", "ecx", "edx", "memory");
 }
 
 #endif /* __ASSEMBLY__ */
index 4baa6bceb2325e6dd056ca682e1eeeb435693a26..d652a38080659775ef145d089291bde353ffe97a 100644 (file)
@@ -52,10 +52,6 @@ static inline void clear_page(void *page)
 
 void copy_page(void *to, void *from);
 
-#ifdef CONFIG_X86_MCE
-#define arch_unmap_kpfn arch_unmap_kpfn
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_VSYSCALL_EMULATION
index 892df375b6155a51f584760efb9f9e77c3f732e8..554841fab717aef09d2b5cc57410a6eed8c2df0c 100644 (file)
@@ -297,9 +297,9 @@ static inline void __flush_tlb_global(void)
 {
        PVOP_VCALL0(pv_mmu_ops.flush_tlb_kernel);
 }
-static inline void __flush_tlb_single(unsigned long addr)
+static inline void __flush_tlb_one_user(unsigned long addr)
 {
-       PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
+       PVOP_VCALL1(pv_mmu_ops.flush_tlb_one_user, addr);
 }
 
 static inline void flush_tlb_others(const struct cpumask *cpumask,
index 6ec54d01972dcf7d79e75bcfbecc84a1f2da80ea..f624f1f10316c248911585f757ea5bd257e98434 100644 (file)
@@ -217,7 +217,7 @@ struct pv_mmu_ops {
        /* TLB operations */
        void (*flush_tlb_user)(void);
        void (*flush_tlb_kernel)(void);
-       void (*flush_tlb_single)(unsigned long addr);
+       void (*flush_tlb_one_user)(unsigned long addr);
        void (*flush_tlb_others)(const struct cpumask *cpus,
                                 const struct flush_tlb_info *info);
 
index e67c0620aec2a268537b46d1da80ce6d0ef174a5..e55466760ff8e031433132eab676535e614e224f 100644 (file)
@@ -61,7 +61,7 @@ void paging_init(void);
 #define kpte_clear_flush(ptep, vaddr)          \
 do {                                           \
        pte_clear(&init_mm, (vaddr), (ptep));   \
-       __flush_tlb_one((vaddr));               \
+       __flush_tlb_one_kernel((vaddr));                \
 } while (0)
 
 #endif /* !__ASSEMBLY__ */
index 793bae7e7ce36bd36e728a8a6fe7f8e17b9920db..1bd9ed87606f45f5a22f2510bde9ba34029a0551 100644 (file)
@@ -91,7 +91,7 @@ struct cpuinfo_x86 {
        __u8                    x86;            /* CPU family */
        __u8                    x86_vendor;     /* CPU vendor */
        __u8                    x86_model;
-       __u8                    x86_mask;
+       __u8                    x86_stepping;
 #ifdef CONFIG_X86_64
        /* Number of 4K pages in DTLB/ITLB combined(in pages): */
        int                     x86_tlbsize;
@@ -109,7 +109,7 @@ struct cpuinfo_x86 {
        char                    x86_vendor_id[16];
        char                    x86_model_id[64];
        /* in KB - valid for CPUS which support this call: */
-       int                     x86_cache_size;
+       unsigned int            x86_cache_size;
        int                     x86_cache_alignment;    /* In bytes */
        /* Cache QoS architectural values: */
        int                     x86_cache_max_rmid;     /* max index */
@@ -977,7 +977,4 @@ bool xen_set_default_idle(void);
 
 void stop_this_cpu(void *dummy);
 void df_debug(struct pt_regs *regs, long error_code);
-
-void __ibp_barrier(void);
-
 #endif /* _ASM_X86_PROCESSOR_H */
index 461f53d27708ae8b80622753122c1a4927537ee2..a4189762b2667016e1e07a0942ff9eaecab9ba51 100644 (file)
@@ -129,6 +129,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 void cpu_disable_common(void);
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
+void calculate_max_logical_packages(void);
 void native_smp_cpus_done(unsigned int max_cpus);
 void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
index 2b8f18ca58747ae40b515c2bf674f8faced147e8..84137c22fdfade9bc8224c317808b311ec6f3007 100644 (file)
@@ -140,7 +140,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 #else
 #define __flush_tlb() __native_flush_tlb()
 #define __flush_tlb_global() __native_flush_tlb_global()
-#define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+#define __flush_tlb_one_user(addr) __native_flush_tlb_one_user(addr)
 #endif
 
 static inline bool tlb_defer_switch_to_init_mm(void)
@@ -400,7 +400,7 @@ static inline void __native_flush_tlb_global(void)
 /*
  * flush one page in the user mapping
  */
-static inline void __native_flush_tlb_single(unsigned long addr)
+static inline void __native_flush_tlb_one_user(unsigned long addr)
 {
        u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
 
@@ -437,18 +437,31 @@ static inline void __flush_tlb_all(void)
 /*
  * flush one page in the kernel mapping
  */
-static inline void __flush_tlb_one(unsigned long addr)
+static inline void __flush_tlb_one_kernel(unsigned long addr)
 {
        count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
-       __flush_tlb_single(addr);
+
+       /*
+        * If PTI is off, then __flush_tlb_one_user() is just INVLPG or its
+        * paravirt equivalent.  Even with PCID, this is sufficient: we only
+        * use PCID if we also use global PTEs for the kernel mapping, and
+        * INVLPG flushes global translations across all address spaces.
+        *
+        * If PTI is on, then the kernel is mapped with non-global PTEs, and
+        * __flush_tlb_one_user() will flush the given address for the current
+        * kernel address space and for its usermode counterpart, but it does
+        * not flush it for other address spaces.
+        */
+       __flush_tlb_one_user(addr);
 
        if (!static_cpu_has(X86_FEATURE_PTI))
                return;
 
        /*
-        * __flush_tlb_single() will have cleared the TLB entry for this ASID,
-        * but since kernel space is replicated across all, we must also
-        * invalidate all others.
+        * See above.  We need to propagate the flush to all other address
+        * spaces.  In principle, we only need to propagate it to kernelmode
+        * address spaces, but the extra bookkeeping we would need is not
+        * worth it.
         */
        invalidate_other_asid();
 }
index 6db28f17ff2884e01122f2689b117e8ae63f9ec4..c88e0b127810f22b15b53eb150d11e9584201885 100644 (file)
@@ -235,7 +235,7 @@ int amd_cache_northbridges(void)
        if (boot_cpu_data.x86 == 0x10 &&
            boot_cpu_data.x86_model >= 0x8 &&
            (boot_cpu_data.x86_model > 0x9 ||
-            boot_cpu_data.x86_mask >= 0x1))
+            boot_cpu_data.x86_stepping >= 0x1))
                amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
 
        if (boot_cpu_data.x86 == 0x15)
index 25ddf02598d20a89cb1da2243aba687e6eee7657..b203af0855b57618fc398e29425ef96755c95552 100644 (file)
@@ -546,7 +546,7 @@ static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
 
 static u32 hsx_deadline_rev(void)
 {
-       switch (boot_cpu_data.x86_mask) {
+       switch (boot_cpu_data.x86_stepping) {
        case 0x02: return 0x3a; /* EP */
        case 0x04: return 0x0f; /* EX */
        }
@@ -556,7 +556,7 @@ static u32 hsx_deadline_rev(void)
 
 static u32 bdx_deadline_rev(void)
 {
-       switch (boot_cpu_data.x86_mask) {
+       switch (boot_cpu_data.x86_stepping) {
        case 0x02: return 0x00000011;
        case 0x03: return 0x0700000e;
        case 0x04: return 0x0f00000c;
@@ -568,7 +568,7 @@ static u32 bdx_deadline_rev(void)
 
 static u32 skx_deadline_rev(void)
 {
-       switch (boot_cpu_data.x86_mask) {
+       switch (boot_cpu_data.x86_stepping) {
        case 0x03: return 0x01000136;
        case 0x04: return 0x02000014;
        }
index 46b675aaf20b8a1f30f1eee2cf63717e299c5367..f11910b44638c84995848a5acdecbb296ddb5636 100644 (file)
@@ -1176,16 +1176,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
 
        uv_gre_table = gre;
        for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
+               unsigned long size = ((unsigned long)(gre->limit - lgre)
+                                       << UV_GAM_RANGE_SHFT);
+               int order = 0;
+               char suffix[] = " KMGTPE";
+
+               while (size > 9999 && order < sizeof(suffix)) {
+                       size /= 1024;
+                       order++;
+               }
+
                if (!index) {
                        pr_info("UV: GAM Range Table...\n");
                        pr_info("UV:  # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN");
                }
-               pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d   %04x  %02x %02x\n",
+               pr_info("UV: %2d: 0x%014lx-0x%014lx %5lu%c %3d   %04x  %02x %02x\n",
                        index++,
                        (unsigned long)lgre << UV_GAM_RANGE_SHFT,
                        (unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
-                       ((unsigned long)(gre->limit - lgre)) >>
-                               (30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
+                       size, suffix[order],
                        gre->type, gre->nasid, gre->sockid, gre->pnode);
 
                lgre = gre->limit;
index fa1261eefa16e73cedf27aadb878753be693f919..f91ba53e06c8b90f9d5557a2713896a1a50100f0 100644 (file)
@@ -18,7 +18,7 @@ void foo(void)
        OFFSET(CPUINFO_x86, cpuinfo_x86, x86);
        OFFSET(CPUINFO_x86_vendor, cpuinfo_x86, x86_vendor);
        OFFSET(CPUINFO_x86_model, cpuinfo_x86, x86_model);
-       OFFSET(CPUINFO_x86_mask, cpuinfo_x86, x86_mask);
+       OFFSET(CPUINFO_x86_stepping, cpuinfo_x86, x86_stepping);
        OFFSET(CPUINFO_cpuid_level, cpuinfo_x86, cpuid_level);
        OFFSET(CPUINFO_x86_capability, cpuinfo_x86, x86_capability);
        OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id);
index 5bddbdcbc4a3cf722cd960c032e8ec400369e17a..f0e6456ca7d3cd482893a7d1953aec5d79c4caad 100644 (file)
@@ -119,7 +119,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                return;
        }
 
-       if (c->x86_model == 6 && c->x86_mask == 1) {
+       if (c->x86_model == 6 && c->x86_stepping == 1) {
                const int K6_BUG_LOOP = 1000000;
                int n;
                void (*f_vide)(void);
@@ -149,7 +149,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
 
        /* K6 with old style WHCR */
        if (c->x86_model < 8 ||
-          (c->x86_model == 8 && c->x86_mask < 8)) {
+          (c->x86_model == 8 && c->x86_stepping < 8)) {
                /* We can only write allocate on the low 508Mb */
                if (mbytes > 508)
                        mbytes = 508;
@@ -168,7 +168,7 @@ static void init_amd_k6(struct cpuinfo_x86 *c)
                return;
        }
 
-       if ((c->x86_model == 8 && c->x86_mask > 7) ||
+       if ((c->x86_model == 8 && c->x86_stepping > 7) ||
             c->x86_model == 9 || c->x86_model == 13) {
                /* The more serious chips .. */
 
@@ -221,7 +221,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
         * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
         * As per AMD technical note 27212 0.2
         */
-       if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
+       if ((c->x86_model == 8 && c->x86_stepping >= 1) || (c->x86_model > 8)) {
                rdmsr(MSR_K7_CLK_CTL, l, h);
                if ((l & 0xfff00000) != 0x20000000) {
                        pr_info("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
@@ -241,12 +241,12 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
         * but they are not certified as MP capable.
         */
        /* Athlon 660/661 is valid. */
-       if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
-           (c->x86_mask == 1)))
+       if ((c->x86_model == 6) && ((c->x86_stepping == 0) ||
+           (c->x86_stepping == 1)))
                return;
 
        /* Duron 670 is valid */
-       if ((c->x86_model == 7) && (c->x86_mask == 0))
+       if ((c->x86_model == 7) && (c->x86_stepping == 0))
                return;
 
        /*
@@ -256,8 +256,8 @@ static void init_amd_k7(struct cpuinfo_x86 *c)
         * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
         * more.
         */
-       if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
-           ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
+       if (((c->x86_model == 6) && (c->x86_stepping >= 2)) ||
+           ((c->x86_model == 7) && (c->x86_stepping >= 1)) ||
             (c->x86_model > 7))
                if (cpu_has(c, X86_FEATURE_MP))
                        return;
@@ -628,7 +628,7 @@ static void early_init_amd(struct cpuinfo_x86 *c)
        /*  Set MTRR capability flag if appropriate */
        if (c->x86 == 5)
                if (c->x86_model == 13 || c->x86_model == 9 ||
-                   (c->x86_model == 8 && c->x86_mask >= 8))
+                   (c->x86_model == 8 && c->x86_stepping >= 8))
                        set_cpu_cap(c, X86_FEATURE_K6_MTRR);
 #endif
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
@@ -795,7 +795,7 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
         * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
         * all up to and including B1.
         */
-       if (c->x86_model <= 1 && c->x86_mask <= 1)
+       if (c->x86_model <= 1 && c->x86_stepping <= 1)
                set_cpu_cap(c, X86_FEATURE_CPB);
 }
 
@@ -906,11 +906,11 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
        /* AMD errata T13 (order #21922) */
        if ((c->x86 == 6)) {
                /* Duron Rev A0 */
-               if (c->x86_model == 3 && c->x86_mask == 0)
+               if (c->x86_model == 3 && c->x86_stepping == 0)
                        size = 64;
                /* Tbird rev A1/A2 */
                if (c->x86_model == 4 &&
-                       (c->x86_mask == 0 || c->x86_mask == 1))
+                       (c->x86_stepping == 0 || c->x86_stepping == 1))
                        size = 256;
        }
        return size;
@@ -1047,7 +1047,7 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
        }
 
        /* OSVW unavailable or ID unknown, match family-model-stepping range */
-       ms = (cpu->x86_model << 4) | cpu->x86_mask;
+       ms = (cpu->x86_model << 4) | cpu->x86_stepping;
        while ((range = *erratum++))
                if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
                    (ms >= AMD_MODEL_RANGE_START(range)) &&
index 71949bf2de5ad378e8184566010e4aaa6aa2307d..d71c8b54b696d4593ffb15ff894468ad3e524a50 100644 (file)
@@ -162,8 +162,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
        if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
                return SPECTRE_V2_CMD_NONE;
        else {
-               ret = cmdline_find_option(boot_command_line, "spectre_v2", arg,
-                                         sizeof(arg));
+               ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
                if (ret < 0)
                        return SPECTRE_V2_CMD_AUTO;
 
@@ -175,8 +174,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
                }
 
                if (i >= ARRAY_SIZE(mitigation_options)) {
-                       pr_err("unknown option (%s). Switching to AUTO select\n",
-                              mitigation_options[i].option);
+                       pr_err("unknown option (%s). Switching to AUTO select\n", arg);
                        return SPECTRE_V2_CMD_AUTO;
                }
        }
@@ -185,8 +183,7 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
             cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
             cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
            !IS_ENABLED(CONFIG_RETPOLINE)) {
-               pr_err("%s selected but not compiled in. Switching to AUTO select\n",
-                      mitigation_options[i].option);
+               pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
                return SPECTRE_V2_CMD_AUTO;
        }
 
@@ -256,14 +253,14 @@ static void __init spectre_v2_select_mitigation(void)
                        goto retpoline_auto;
                break;
        }
-       pr_err("kernel not compiled with retpoline; no mitigation available!");
+       pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
        return;
 
 retpoline_auto:
        if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
        retpoline_amd:
                if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
-                       pr_err("LFENCE not serializing. Switching to generic retpoline\n");
+                       pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
                        goto retpoline_generic;
                }
                mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD :
@@ -281,7 +278,7 @@ retpoline_auto:
        pr_info("%s\n", spectre_v2_strings[mode]);
 
        /*
-        * If neither SMEP or KPTI are available, there is a risk of
+        * If neither SMEP nor PTI are available, there is a risk of
         * hitting userspace addresses in the RSB after a context switch
         * from a shallow call stack to a deeper one. To prevent this fill
         * the entire RSB, even when using IBRS.
@@ -295,21 +292,20 @@ retpoline_auto:
        if ((!boot_cpu_has(X86_FEATURE_PTI) &&
             !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
                setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
-               pr_info("Filling RSB on context switch\n");
+               pr_info("Spectre v2 mitigation: Filling RSB on context switch\n");
        }
 
        /* Initialize Indirect Branch Prediction Barrier if supported */
        if (boot_cpu_has(X86_FEATURE_IBPB)) {
                setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
-               pr_info("Enabling Indirect Branch Prediction Barrier\n");
+               pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n");
        }
 }
 
 #undef pr_fmt
 
 #ifdef CONFIG_SYSFS
-ssize_t cpu_show_meltdown(struct device *dev,
-                         struct device_attribute *attr, char *buf)
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
 {
        if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
                return sprintf(buf, "Not affected\n");
@@ -318,16 +314,14 @@ ssize_t cpu_show_meltdown(struct device *dev,
        return sprintf(buf, "Vulnerable\n");
 }
 
-ssize_t cpu_show_spectre_v1(struct device *dev,
-                           struct device_attribute *attr, char *buf)
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
 {
        if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
                return sprintf(buf, "Not affected\n");
        return sprintf(buf, "Mitigation: __user pointer sanitization\n");
 }
 
-ssize_t cpu_show_spectre_v2(struct device *dev,
-                           struct device_attribute *attr, char *buf)
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
 {
        if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
                return sprintf(buf, "Not affected\n");
@@ -337,9 +331,3 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
                       spectre_v2_module_string());
 }
 #endif
-
-void __ibp_barrier(void)
-{
-       __wrmsr(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, 0);
-}
-EXPORT_SYMBOL_GPL(__ibp_barrier);
index c578cd29c2d2c47bd8c03268bc8809c28785d43c..e5ec0f11c0de7c06a0975d852822b2033d914185 100644 (file)
@@ -140,7 +140,7 @@ static void init_centaur(struct cpuinfo_x86 *c)
                        clear_cpu_cap(c, X86_FEATURE_TSC);
                        break;
                case 8:
-                       switch (c->x86_mask) {
+                       switch (c->x86_stepping) {
                        default:
                        name = "2";
                                break;
@@ -215,7 +215,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
         *  - Note, it seems this may only be in engineering samples.
         */
        if ((c->x86 == 6) && (c->x86_model == 9) &&
-                               (c->x86_mask == 1) && (size == 65))
+                               (c->x86_stepping == 1) && (size == 65))
                size -= 1;
        return size;
 }
index d63f4b5706e4d76271da40fe14c6ef4ba41fe999..824aee0117bb5402d52fb5c8958e98b0bebfdf0c 100644 (file)
@@ -731,7 +731,7 @@ void cpu_detect(struct cpuinfo_x86 *c)
                cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
                c->x86          = x86_family(tfms);
                c->x86_model    = x86_model(tfms);
-               c->x86_mask     = x86_stepping(tfms);
+               c->x86_stepping = x86_stepping(tfms);
 
                if (cap0 & (1<<19)) {
                        c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
@@ -1184,9 +1184,9 @@ static void identify_cpu(struct cpuinfo_x86 *c)
        int i;
 
        c->loops_per_jiffy = loops_per_jiffy;
-       c->x86_cache_size = -1;
+       c->x86_cache_size = 0;
        c->x86_vendor = X86_VENDOR_UNKNOWN;
-       c->x86_model = c->x86_mask = 0; /* So far unknown... */
+       c->x86_model = c->x86_stepping = 0;     /* So far unknown... */
        c->x86_vendor_id[0] = '\0'; /* Unset */
        c->x86_model_id[0] = '\0';  /* Unset */
        c->x86_max_cores = 1;
@@ -1378,8 +1378,8 @@ void print_cpu_info(struct cpuinfo_x86 *c)
 
        pr_cont(" (family: 0x%x, model: 0x%x", c->x86, c->x86_model);
 
-       if (c->x86_mask || c->cpuid_level >= 0)
-               pr_cont(", stepping: 0x%x)\n", c->x86_mask);
+       if (c->x86_stepping || c->cpuid_level >= 0)
+               pr_cont(", stepping: 0x%x)\n", c->x86_stepping);
        else
                pr_cont(")\n");
 }
index 6b4bb335641f3f039c7cff50190ad38b5cf9de66..8949b7ae6d92536c1bbff659d7463588d2bdfb06 100644 (file)
@@ -215,7 +215,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
 
        /* common case step number/rev -- exceptions handled below */
        c->x86_model = (dir1 >> 4) + 1;
-       c->x86_mask = dir1 & 0xf;
+       c->x86_stepping = dir1 & 0xf;
 
        /* Now cook; the original recipe is by Channing Corn, from Cyrix.
         * We do the same thing for each generation: we work out
index 319bf989fad1e1f3d7ed2234090a51c55a0067c6..d19e903214b403289aaf304eba85cc585c100c5e 100644 (file)
@@ -116,14 +116,13 @@ struct sku_microcode {
        u32 microcode;
 };
 static const struct sku_microcode spectre_bad_microcodes[] = {
-       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0B,   0x84 },
-       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0A,   0x84 },
-       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x09,   0x84 },
-       { INTEL_FAM6_KABYLAKE_MOBILE,   0x0A,   0x84 },
-       { INTEL_FAM6_KABYLAKE_MOBILE,   0x09,   0x84 },
+       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0B,   0x80 },
+       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x0A,   0x80 },
+       { INTEL_FAM6_KABYLAKE_DESKTOP,  0x09,   0x80 },
+       { INTEL_FAM6_KABYLAKE_MOBILE,   0x0A,   0x80 },
+       { INTEL_FAM6_KABYLAKE_MOBILE,   0x09,   0x80 },
        { INTEL_FAM6_SKYLAKE_X,         0x03,   0x0100013e },
        { INTEL_FAM6_SKYLAKE_X,         0x04,   0x0200003c },
-       { INTEL_FAM6_SKYLAKE_MOBILE,    0x03,   0xc2 },
        { INTEL_FAM6_SKYLAKE_DESKTOP,   0x03,   0xc2 },
        { INTEL_FAM6_BROADWELL_CORE,    0x04,   0x28 },
        { INTEL_FAM6_BROADWELL_GT3E,    0x01,   0x1b },
@@ -136,8 +135,6 @@ static const struct sku_microcode spectre_bad_microcodes[] = {
        { INTEL_FAM6_HASWELL_X,         0x02,   0x3b },
        { INTEL_FAM6_HASWELL_X,         0x04,   0x10 },
        { INTEL_FAM6_IVYBRIDGE_X,       0x04,   0x42a },
-       /* Updated in the 20180108 release; blacklist until we know otherwise */
-       { INTEL_FAM6_ATOM_GEMINI_LAKE,  0x01,   0x22 },
        /* Observed in the wild */
        { INTEL_FAM6_SANDYBRIDGE_X,     0x06,   0x61b },
        { INTEL_FAM6_SANDYBRIDGE_X,     0x07,   0x712 },
@@ -149,7 +146,7 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
 
        for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
                if (c->x86_model == spectre_bad_microcodes[i].model &&
-                   c->x86_mask == spectre_bad_microcodes[i].stepping)
+                   c->x86_stepping == spectre_bad_microcodes[i].stepping)
                        return (c->microcode <= spectre_bad_microcodes[i].microcode);
        }
        return false;
@@ -196,7 +193,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
         * need the microcode to have already been loaded... so if it is
         * not, recommend a BIOS update and disable large pages.
         */
-       if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2 &&
+       if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
            c->microcode < 0x20e) {
                pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
                clear_cpu_cap(c, X86_FEATURE_PSE);
@@ -212,7 +209,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 
        /* CPUID workaround for 0F33/0F34 CPU */
        if (c->x86 == 0xF && c->x86_model == 0x3
-           && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
+           && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4))
                c->x86_phys_bits = 36;
 
        /*
@@ -310,7 +307,7 @@ int ppro_with_ram_bug(void)
        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
            boot_cpu_data.x86 == 6 &&
            boot_cpu_data.x86_model == 1 &&
-           boot_cpu_data.x86_mask < 8) {
+           boot_cpu_data.x86_stepping < 8) {
                pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n");
                return 1;
        }
@@ -327,7 +324,7 @@ static void intel_smp_check(struct cpuinfo_x86 *c)
         * Mask B, Pentium, but not Pentium MMX
         */
        if (c->x86 == 5 &&
-           c->x86_mask >= 1 && c->x86_mask <= 4 &&
+           c->x86_stepping >= 1 && c->x86_stepping <= 4 &&
            c->x86_model <= 3) {
                /*
                 * Remember we have B step Pentia with bugs
@@ -370,7 +367,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
         * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
         * model 3 mask 3
         */
-       if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
+       if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633)
                clear_cpu_cap(c, X86_FEATURE_SEP);
 
        /*
@@ -388,7 +385,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
         * P4 Xeon erratum 037 workaround.
         * Hardware prefetcher may cause stale data to be loaded into the cache.
         */
-       if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
+       if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) {
                if (msr_set_bit(MSR_IA32_MISC_ENABLE,
                                MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) {
                        pr_info("CPU: C0 stepping P4 Xeon detected.\n");
@@ -403,7 +400,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
         * Specification Update").
         */
        if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
-           (c->x86_mask < 0x6 || c->x86_mask == 0xb))
+           (c->x86_stepping < 0x6 || c->x86_stepping == 0xb))
                set_cpu_bug(c, X86_BUG_11AP);
 
 
@@ -650,7 +647,7 @@ static void init_intel(struct cpuinfo_x86 *c)
                case 6:
                        if (l2 == 128)
                                p = "Celeron (Mendocino)";
-                       else if (c->x86_mask == 0 || c->x86_mask == 5)
+                       else if (c->x86_stepping == 0 || c->x86_stepping == 5)
                                p = "Celeron-A";
                        break;
 
index 410629f10ad377176787b1b93ddb36625de2be36..589b948e6e01f01d7388cca456fbecd6019dbca4 100644 (file)
@@ -819,7 +819,7 @@ static __init void rdt_quirks(void)
                        cache_alloc_hsw_probe();
                break;
        case INTEL_FAM6_SKYLAKE_X:
-               if (boot_cpu_data.x86_mask <= 4)
+               if (boot_cpu_data.x86_stepping <= 4)
                        set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat");
        }
 }
index aa0d5df9dc60e710b22ab7172f0e5fd6e05db2c9..e956eb26706191d27447bc9feec2e9fbde5310c7 100644 (file)
@@ -115,4 +115,19 @@ static inline void mce_unregister_injector_chain(struct notifier_block *nb)        { }
 
 extern struct mca_config mca_cfg;
 
+#ifndef CONFIG_X86_64
+/*
+ * On 32-bit systems it would be difficult to safely unmap a poison page
+ * from the kernel 1:1 map because there are no non-canonical addresses that
+ * we can use to refer to the address without risking a speculative access.
+ * However, this isn't much of an issue because:
+ * 1) Few unmappable pages are in the 1:1 map. Most are in HIGHMEM which
+ *    are only mapped into the kernel as needed
+ * 2) Few people would run a 32-bit kernel on a machine that supports
+ *    recoverable errors because they have too much memory to boot 32-bit.
+ */
+static inline void mce_unmap_kpfn(unsigned long pfn) {}
+#define mce_unmap_kpfn mce_unmap_kpfn
+#endif
+
 #endif /* __X86_MCE_INTERNAL_H__ */
index 3a8e88a611ebf99d70c2cacf2d866343f01a4a1a..8ff94d1e2dce54e87cc72c63812365d610476ec8 100644 (file)
@@ -105,6 +105,10 @@ static struct irq_work mce_irq_work;
 
 static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
 
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn);
+#endif
+
 /*
  * CPU/chipset specific EDAC code can register a notifier call here to print
  * MCE errors in a human-readable form.
@@ -234,7 +238,7 @@ static void __print_mce(struct mce *m)
                        m->cs, m->ip);
 
                if (m->cs == __KERNEL_CS)
-                       pr_cont("{%pS}", (void *)m->ip);
+                       pr_cont("{%pS}", (void *)(unsigned long)m->ip);
                pr_cont("\n");
        }
 
@@ -590,7 +594,8 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
 
        if (mce_usable_address(mce) && (mce->severity == MCE_AO_SEVERITY)) {
                pfn = mce->addr >> PAGE_SHIFT;
-               memory_failure(pfn, 0);
+               if (!memory_failure(pfn, 0))
+                       mce_unmap_kpfn(pfn);
        }
 
        return NOTIFY_OK;
@@ -1057,12 +1062,13 @@ static int do_memory_failure(struct mce *m)
        ret = memory_failure(m->addr >> PAGE_SHIFT, flags);
        if (ret)
                pr_err("Memory error not recovered");
+       else
+               mce_unmap_kpfn(m->addr >> PAGE_SHIFT);
        return ret;
 }
 
-#if defined(arch_unmap_kpfn) && defined(CONFIG_MEMORY_FAILURE)
-
-void arch_unmap_kpfn(unsigned long pfn)
+#ifndef mce_unmap_kpfn
+static void mce_unmap_kpfn(unsigned long pfn)
 {
        unsigned long decoy_addr;
 
@@ -1073,7 +1079,7 @@ void arch_unmap_kpfn(unsigned long pfn)
         * We would like to just call:
         *      set_memory_np((unsigned long)pfn_to_kaddr(pfn), 1);
         * but doing that would radically increase the odds of a
-        * speculative access to the posion page because we'd have
+        * speculative access to the poison page because we'd have
         * the virtual address of the kernel 1:1 mapping sitting
         * around in registers.
         * Instead we get tricky.  We create a non-canonical address
@@ -1098,7 +1104,6 @@ void arch_unmap_kpfn(unsigned long pfn)
 
        if (set_memory_np(decoy_addr, 1))
                pr_warn("Could not invalidate pfn=0x%lx from 1:1 map\n", pfn);
-
 }
 #endif
 
index f7c55b0e753ad038332307b2c7347abd1dcbc670..a15db2b4e0d66a8b5c4d2468359eeafb85401151 100644 (file)
@@ -921,7 +921,7 @@ static bool is_blacklisted(unsigned int cpu)
         */
        if (c->x86 == 6 &&
            c->x86_model == INTEL_FAM6_BROADWELL_X &&
-           c->x86_mask == 0x01 &&
+           c->x86_stepping == 0x01 &&
            llc_size_per_core > 2621440 &&
            c->microcode < 0x0b000021) {
                pr_err_once("Erratum BDF90: late loading with revision < 0x0b000021 (0x%x) disabled.\n", c->microcode);
@@ -944,7 +944,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
                return UCODE_NFOUND;
 
        sprintf(name, "intel-ucode/%02x-%02x-%02x",
-               c->x86, c->x86_model, c->x86_mask);
+               c->x86, c->x86_model, c->x86_stepping);
 
        if (request_firmware_direct(&firmware, name, device)) {
                pr_debug("data file %s load failed\n", name);
@@ -982,7 +982,7 @@ static struct microcode_ops microcode_intel_ops = {
 
 static int __init calc_llc_size_per_core(struct cpuinfo_x86 *c)
 {
-       u64 llc_size = c->x86_cache_size * 1024;
+       u64 llc_size = c->x86_cache_size * 1024ULL;
 
        do_div(llc_size, c->x86_max_cores);
 
index fdc55215d44d08b8c170767f94e68c4e60c09079..e12ee86906c6250faa05b13ae9bb9c3dc545558f 100644 (file)
@@ -859,7 +859,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size,
         */
        if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
            boot_cpu_data.x86_model == 1 &&
-           boot_cpu_data.x86_mask <= 7) {
+           boot_cpu_data.x86_stepping <= 7) {
                if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
                        pr_warn("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
                        return -EINVAL;
index 40d5a8a752125ed5d26a7605d5eabad572879bfc..7468de4290873ad4664a5575e6cc0f4d74a26ae6 100644 (file)
@@ -711,8 +711,8 @@ void __init mtrr_bp_init(void)
                        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
                            boot_cpu_data.x86 == 0xF &&
                            boot_cpu_data.x86_model == 0x3 &&
-                           (boot_cpu_data.x86_mask == 0x3 ||
-                            boot_cpu_data.x86_mask == 0x4))
+                           (boot_cpu_data.x86_stepping == 0x3 ||
+                            boot_cpu_data.x86_stepping == 0x4))
                                phys_addr = 36;
 
                        size_or_mask = SIZE_OR_MASK_BITS(phys_addr);
index e7ecedafa1c8f7b033eccbdcf24198089933b386..2c8522a39ed5dbc388bada821ed144f2435adac2 100644 (file)
@@ -72,8 +72,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
                   c->x86_model,
                   c->x86_model_id[0] ? c->x86_model_id : "unknown");
 
-       if (c->x86_mask || c->cpuid_level >= 0)
-               seq_printf(m, "stepping\t: %d\n", c->x86_mask);
+       if (c->x86_stepping || c->cpuid_level >= 0)
+               seq_printf(m, "stepping\t: %d\n", c->x86_stepping);
        else
                seq_puts(m, "stepping\t: unknown\n");
        if (c->microcode)
@@ -91,8 +91,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
        }
 
        /* Cache size */
-       if (c->x86_cache_size >= 0)
-               seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
+       if (c->x86_cache_size)
+               seq_printf(m, "cache size\t: %u KB\n", c->x86_cache_size);
 
        show_cpuinfo_core(m, c, cpu);
        show_cpuinfo_misc(m, c);
index c29020907886a32d1ca40e9beb858c32dfdefd0f..b59e4fb40fd9986c0cc6b629b4c7a3a18a6d23b4 100644 (file)
@@ -37,7 +37,7 @@
 #define X86            new_cpu_data+CPUINFO_x86
 #define X86_VENDOR     new_cpu_data+CPUINFO_x86_vendor
 #define X86_MODEL      new_cpu_data+CPUINFO_x86_model
-#define X86_MASK       new_cpu_data+CPUINFO_x86_mask
+#define X86_STEPPING   new_cpu_data+CPUINFO_x86_stepping
 #define X86_HARD_MATH  new_cpu_data+CPUINFO_hard_math
 #define X86_CPUID      new_cpu_data+CPUINFO_cpuid_level
 #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
@@ -332,7 +332,7 @@ ENTRY(startup_32_smp)
        shrb $4,%al
        movb %al,X86_MODEL
        andb $0x0f,%cl          # mask mask revision
-       movb %cl,X86_MASK
+       movb %cl,X86_STEPPING
        movl %edx,X86_CAPABILITY
 
 .Lis486:
index 27d0a1712663673ac9993a6ddd055cb075b265fa..f1c5eb99d445407a9fc134e76a8010d17a61d780 100644 (file)
@@ -410,7 +410,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
        processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
        processor.cpuflag = CPU_ENABLED;
        processor.cpufeature = (boot_cpu_data.x86 << 8) |
-           (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+           (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_stepping;
        processor.featureflag = boot_cpu_data.x86_capability[CPUID_1_EDX];
        processor.reserved[0] = 0;
        processor.reserved[1] = 0;
index 041096bdef860d356d58873e3e0483384ee98301..99dc79e76bdc5497c8e07c6ee32e74ffe04ff492 100644 (file)
--- a/