Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 25 Mar 2018 17:36:02 +0000 (07:36 -1000)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 25 Mar 2018 17:36:02 +0000 (07:36 -1000)
Pull x86 and PTI fixes from Ingo Molnar:
 "Misc fixes:

   - fix EFI pagetables freeing

   - fix vsyscall pagetable setting on Xen PV guests

   - remove ancient CONFIG_X86_PPRO_FENCE=y - x86 is TSO again

   - fix two binutils (ld) development version related incompatibilities

   - clean up breakpoint handling

   - fix an x86 self-test"

* 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/entry/64: Don't use IST entry for #BP stack
  x86/efi: Free efi_pgd with free_pages()
  x86/vsyscall/64: Use proper accessor to update P4D entry
  x86/cpu: Remove the CONFIG_X86_PPRO_FENCE=y quirk
  x86/boot/64: Verify alignment of the LOAD segment
  x86/build/64: Force the linker to use 2MB page size
  selftests/x86/ptrace_syscall: Fix for yet more glibc interference

14 files changed:
arch/x86/Kconfig.cpu
arch/x86/Makefile
arch/x86/boot/compressed/misc.c
arch/x86/entry/entry_64.S
arch/x86/entry/vdso/vdso32/vclock_gettime.c
arch/x86/entry/vsyscall/vsyscall_64.c
arch/x86/include/asm/barrier.h
arch/x86/include/asm/io.h
arch/x86/kernel/idt.c
arch/x86/kernel/pci-nommu.c
arch/x86/kernel/traps.c
arch/x86/platform/efi/efi_64.c
arch/x86/um/asm/barrier.h
tools/testing/selftests/x86/ptrace_syscall.c

index 8b8d2297d4867b06acaecf31e3be7124888f4d79..638411f22267aa34bd6ddda10eeccded1a1f6b48 100644 (file)
@@ -315,19 +315,6 @@ config X86_L1_CACHE_SHIFT
        default "4" if MELAN || M486 || MGEODEGX1
        default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
 
-config X86_PPRO_FENCE
-       bool "PentiumPro memory ordering errata workaround"
-       depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1
-       ---help---
-         Old PentiumPro multiprocessor systems had errata that could cause
-         memory operations to violate the x86 ordering standard in rare cases.
-         Enabling this option will attempt to work around some (but not all)
-         occurrences of this problem, at the cost of much heavier spinlock and
-         memory barrier operations.
-
-         If unsure, say n here. Even distro kernels should think twice before
-         enabling this: there are few systems, and an unlikely bug.
-
 config X86_F00F_BUG
        def_bool y
        depends on M586MMX || M586TSC || M586 || M486
index 498c1b8123006add6ad685a1fea6239208509810..1c4d012550ec5fab1e2ae26b9cf8a67168da75fd 100644 (file)
@@ -223,6 +223,15 @@ KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
 
 LDFLAGS := -m elf_$(UTS_MACHINE)
 
+#
+# The 64-bit kernel must be aligned to 2MB.  Pass -z max-page-size=0x200000 to
+# the linker to force 2MB page size regardless of the default page size used
+# by the linker.
+#
+ifdef CONFIG_X86_64
+LDFLAGS += $(call ld-option, -z max-page-size=0x200000)
+endif
+
 # Speed up the build
 KBUILD_CFLAGS += -pipe
 # Workaround for a gcc prelease that unfortunately was shipped in a suse release
index 98761a1576ceb5c21b2d8c7e98c1217fd48abb26..252fee32081662ce3bf21d71158969afc9c596b3 100644 (file)
@@ -309,6 +309,10 @@ static void parse_elf(void *output)
 
                switch (phdr->p_type) {
                case PT_LOAD:
+#ifdef CONFIG_X86_64
+                       if ((phdr->p_align % 0x200000) != 0)
+                               error("Alignment of LOAD segment isn't multiple of 2MB");
+#endif
 #ifdef CONFIG_RELOCATABLE
                        dest = output;
                        dest += (phdr->p_paddr - LOAD_PHYSICAL_ADDR);
index 805f52703ee3377f88180eb3467a34bf2bc40e87..18ed349b4f8379257daf9b31861912df9b411fec 100644 (file)
@@ -1138,7 +1138,7 @@ apicinterrupt3 HYPERV_REENLIGHTENMENT_VECTOR \
 #endif /* CONFIG_HYPERV */
 
 idtentry debug                 do_debug                has_error_code=0        paranoid=1 shift_ist=DEBUG_STACK
-idtentry int3                  do_int3                 has_error_code=0        paranoid=1 shift_ist=DEBUG_STACK
+idtentry int3                  do_int3                 has_error_code=0
 idtentry stack_segment         do_stack_segment        has_error_code=1
 
 #ifdef CONFIG_XEN
index 7780bbfb06ef2da78c6a82d4978ac467899873d2..9242b28418d58d11373a81d6aa0403f8eddae2b4 100644 (file)
@@ -5,8 +5,6 @@
 #undef CONFIG_OPTIMIZE_INLINING
 #endif
 
-#undef CONFIG_X86_PPRO_FENCE
-
 #ifdef CONFIG_X86_64
 
 /*
index 8560ef68a9d631163934a40415df65d2a495f2bc..317be365bce3a459b9d2b60dad6ad864868b1366 100644 (file)
@@ -347,7 +347,7 @@ void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
        set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
        p4d = p4d_offset(pgd, VSYSCALL_ADDR);
 #if CONFIG_PGTABLE_LEVELS >= 5
-       p4d->p4d |= _PAGE_USER;
+       set_p4d(p4d, __p4d(p4d_val(*p4d) | _PAGE_USER));
 #endif
        pud = pud_offset(p4d, VSYSCALL_ADDR);
        set_pud(pud, __pud(pud_val(*pud) | _PAGE_USER));
index e1259f043ae999fa21e1f998431ab12cd73a11ea..042b5e892ed1063769b253bdf35e31171eb55c4d 100644 (file)
@@ -52,11 +52,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 #define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \
                                           "lfence", X86_FEATURE_LFENCE_RDTSC)
 
-#ifdef CONFIG_X86_PPRO_FENCE
-#define dma_rmb()      rmb()
-#else
 #define dma_rmb()      barrier()
-#endif
 #define dma_wmb()      barrier()
 
 #ifdef CONFIG_X86_32
@@ -68,30 +64,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 #define __smp_wmb()    barrier()
 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
 
-#if defined(CONFIG_X86_PPRO_FENCE)
-
-/*
- * For this option x86 doesn't have a strong TSO memory
- * model and we should fall back to full barriers.
- */
-
-#define __smp_store_release(p, v)                                      \
-do {                                                                   \
-       compiletime_assert_atomic_type(*p);                             \
-       __smp_mb();                                                     \
-       WRITE_ONCE(*p, v);                                              \
-} while (0)
-
-#define __smp_load_acquire(p)                                          \
-({                                                                     \
-       typeof(*p) ___p1 = READ_ONCE(*p);                               \
-       compiletime_assert_atomic_type(*p);                             \
-       __smp_mb();                                                     \
-       ___p1;                                                          \
-})
-
-#else /* regular x86 TSO memory ordering */
-
 #define __smp_store_release(p, v)                                      \
 do {                                                                   \
        compiletime_assert_atomic_type(*p);                             \
@@ -107,8 +79,6 @@ do {                                                                 \
        ___p1;                                                          \
 })
 
-#endif
-
 /* Atomic operations are already serializing on x86 */
 #define __smp_mb__before_atomic()      barrier()
 #define __smp_mb__after_atomic()       barrier()
index 95e948627fd04878883041543c3b5f13703ceaa9..f6e5b9375d8c324644e1f8ddb221fa625e063f3b 100644 (file)
@@ -232,21 +232,6 @@ extern void set_iounmap_nonlazy(void);
  */
 #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET))
 
-/*
- *     Cache management
- *
- *     This needed for two cases
- *     1. Out of order aware processors
- *     2. Accidentally out of order processors (PPro errata #51)
- */
-
-static inline void flush_write_buffers(void)
-{
-#if defined(CONFIG_X86_PPRO_FENCE)
-       asm volatile("lock; addl $0,0(%%esp)": : :"memory");
-#endif
-}
-
 #endif /* __KERNEL__ */
 
 extern void native_io_delay(void);
index 56d99be3706a23b600a50d2ae366b3a8fde251ed..50bee5fe114013622ee858e4a3b7b16d1ffa8a05 100644 (file)
@@ -160,7 +160,6 @@ static const __initconst struct idt_data early_pf_idts[] = {
  */
 static const __initconst struct idt_data dbg_idts[] = {
        INTG(X86_TRAP_DB,       debug),
-       INTG(X86_TRAP_BP,       int3),
 };
 #endif
 
@@ -183,7 +182,6 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
 static const __initconst struct idt_data ist_idts[] = {
        ISTG(X86_TRAP_DB,       debug,          DEBUG_STACK),
        ISTG(X86_TRAP_NMI,      nmi,            NMI_STACK),
-       SISTG(X86_TRAP_BP,      int3,           DEBUG_STACK),
        ISTG(X86_TRAP_DF,       double_fault,   DOUBLEFAULT_STACK),
 #ifdef CONFIG_X86_MCE
        ISTG(X86_TRAP_MC,       &machine_check, MCE_STACK),
index 618285e475c622bc9f0a9fa8356794ed70da8e02..ac7ea3a8242fee683916cba35854655affaf0e45 100644 (file)
@@ -37,7 +37,6 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
        WARN_ON(size == 0);
        if (!check_addr("map_single", dev, bus, size))
                return NOMMU_MAPPING_ERROR;
-       flush_write_buffers();
        return bus;
 }
 
@@ -72,25 +71,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
                        return 0;
                s->dma_length = s->length;
        }
-       flush_write_buffers();
        return nents;
 }
 
-static void nommu_sync_single_for_device(struct device *dev,
-                       dma_addr_t addr, size_t size,
-                       enum dma_data_direction dir)
-{
-       flush_write_buffers();
-}
-
-
-static void nommu_sync_sg_for_device(struct device *dev,
-                       struct scatterlist *sg, int nelems,
-                       enum dma_data_direction dir)
-{
-       flush_write_buffers();
-}
-
 static int nommu_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
        return dma_addr == NOMMU_MAPPING_ERROR;
@@ -101,8 +84,6 @@ const struct dma_map_ops nommu_dma_ops = {
        .free                   = dma_generic_free_coherent,
        .map_sg                 = nommu_map_sg,
        .map_page               = nommu_map_page,
-       .sync_single_for_device = nommu_sync_single_for_device,
-       .sync_sg_for_device     = nommu_sync_sg_for_device,
        .is_phys                = 1,
        .mapping_error          = nommu_mapping_error,
        .dma_supported          = x86_dma_supported,
index 3d9b2308e7fad0be6f90cbf834d2e00d1f4fc417..03f3d7695daccae1af7265f2aab221da81573ce5 100644 (file)
@@ -577,7 +577,6 @@ do_general_protection(struct pt_regs *regs, long error_code)
 }
 NOKPROBE_SYMBOL(do_general_protection);
 
-/* May run on IST stack. */
 dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
 {
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -592,6 +591,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
        if (poke_int3_handler(regs))
                return;
 
+       /*
+        * Use ist_enter despite the fact that we don't use an IST stack.
+        * We can be called from a kprobe in non-CONTEXT_KERNEL kernel
+        * mode or even during context tracking state changes.
+        *
+        * This means that we can't schedule.  That's okay.
+        */
        ist_enter(regs);
        RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
@@ -609,15 +615,10 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
                        SIGTRAP) == NOTIFY_STOP)
                goto exit;
 
-       /*
-        * Let others (NMI) know that the debug stack is in use
-        * as we may switch to the interrupt stack.
-        */
-       debug_stack_usage_inc();
        cond_local_irq_enable(regs);
        do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
        cond_local_irq_disable(regs);
-       debug_stack_usage_dec();
+
 exit:
        ist_exit(regs);
 }
index c310a82843589c6df238e72061b037de42b22867..f9cfbc0d1f337e63b9a744cdb423ba2ecd6712f8 100644 (file)
@@ -227,7 +227,7 @@ int __init efi_alloc_page_tables(void)
        if (!pud) {
                if (CONFIG_PGTABLE_LEVELS > 4)
                        free_page((unsigned long) pgd_page_vaddr(*pgd));
-               free_page((unsigned long)efi_pgd);
+               free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
                return -ENOMEM;
        }
 
index b7d73400ea29aa095acd58c5e8502a6524aff04f..f31e5d9031617b21df04416feef5aff3f85e9b59 100644 (file)
 
 #endif /* CONFIG_X86_32 */
 
-#ifdef CONFIG_X86_PPRO_FENCE
-#define dma_rmb()      rmb()
-#else /* CONFIG_X86_PPRO_FENCE */
 #define dma_rmb()      barrier()
-#endif /* CONFIG_X86_PPRO_FENCE */
 #define dma_wmb()      barrier()
 
 #include <asm-generic/barrier.h>
index 1ae1c5a7392ead3c8b8a108f70bdddbf0d4f100e..6f22238f32173db4ef4d710666807cb5397f7dc0 100644 (file)
@@ -183,8 +183,10 @@ static void test_ptrace_syscall_restart(void)
                if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
                        err(1, "PTRACE_TRACEME");
 
+               pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
                printf("\tChild will make one syscall\n");
-               raise(SIGSTOP);
+               syscall(SYS_tgkill, pid, tid, SIGSTOP);
 
                syscall(SYS_gettid, 10, 11, 12, 13, 14, 15);
                _exit(0);
@@ -301,9 +303,11 @@ static void test_restart_under_ptrace(void)
                if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
                        err(1, "PTRACE_TRACEME");
 
+               pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
                printf("\tChild will take a nap until signaled\n");
                setsigign(SIGUSR1, SA_RESTART);
-               raise(SIGSTOP);
+               syscall(SYS_tgkill, pid, tid, SIGSTOP);
 
                syscall(SYS_pause, 0, 0, 0, 0, 0, 0);
                _exit(0);