Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 13 Sep 2017 18:56:16 +0000 (11:56 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 13 Sep 2017 18:56:16 +0000 (11:56 -0700)
Pull x86 fixes from Ingo Molnar:
 "The main changes are the PCID fixes from Andy, but there's also two
  hyperv fixes and two paravirt updates"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/hyper-v: Remove duplicated HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED definition
  x86/hyper-V: Allocate the IDT entry early in boot
  paravirt: Switch maintainer
  x86/paravirt: Remove no longer used paravirt functions
  x86/mm/64: Initialize CR4.PCIDE early
  x86/hibernate/64: Mask off CR3's PCID bits in the saved CR3
  x86/mm: Get rid of VM_BUG_ON in switch_tlb_irqs_off()

19 files changed:
MAINTAINERS
arch/x86/include/asm/desc.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/paravirt_types.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/special_insns.h
arch/x86/include/uapi/asm/hyperv.h
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/mshyperv.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/setup.c
arch/x86/kernel/smpboot.c
arch/x86/kvm/vmx.c
arch/x86/mm/init.c
arch/x86/mm/pgtable.c
arch/x86/mm/tlb.c
arch/x86/power/hibernate_64.c
arch/x86/xen/enlighten_pv.c
arch/x86/xen/mmu_pv.c

index b61848afb9d2618ab5eb1be26a3d3c41eff6c451..209306019483c88ece7c990b9d3fe85f77837535 100644 (file)
@@ -10135,7 +10135,7 @@ F:      include/uapi/linux/ppdev.h
 F:     Documentation/parport*.txt
 
 PARAVIRT_OPS INTERFACE
-M:     Jeremy Fitzhardinge <jeremy@goop.org>
+M:     Juergen Gross <jgross@suse.com>
 M:     Chris Wright <chrisw@sous-sol.org>
 M:     Alok Kataria <akataria@vmware.com>
 M:     Rusty Russell <rusty@rustcorp.com.au>
@@ -10143,7 +10143,7 @@ L:      virtualization@lists.linux-foundation.org
 S:     Supported
 F:     Documentation/virtual/paravirt_ops.txt
 F:     arch/*/kernel/paravirt*
-F:     arch/*/include/asm/paravirt.h
+F:     arch/*/include/asm/paravirt*.h
 F:     include/linux/hypervisor.h
 
 PARIDE DRIVERS FOR PARALLEL PORT IDE DEVICES
index 1a2ba368da39ff4fbf24a922415490e1a2be68e6..9d0e13738ed3d3ea9cc9200c62153c101624a7b7 100644 (file)
@@ -121,7 +121,6 @@ static inline int desc_empty(const void *ptr)
 #define load_ldt(ldt)                          asm volatile("lldt %0"::"m" (ldt))
 
 #define store_gdt(dtr)                         native_store_gdt(dtr)
-#define store_idt(dtr)                         native_store_idt(dtr)
 #define store_tr(tr)                           (tr = native_store_tr())
 
 #define load_TLS(t, cpu)                       native_load_tls(t, cpu)
@@ -228,7 +227,7 @@ static inline void native_store_gdt(struct desc_ptr *dtr)
        asm volatile("sgdt %0":"=m" (*dtr));
 }
 
-static inline void native_store_idt(struct desc_ptr *dtr)
+static inline void store_idt(struct desc_ptr *dtr)
 {
        asm volatile("sidt %0":"=m" (*dtr));
 }
index c25dd22f7c70e97f293ecb086c1be572087ae0b3..12deec722cf0a14374d794b037eb26bf2e02ad51 100644 (file)
@@ -71,11 +71,6 @@ static inline void write_cr3(unsigned long x)
        PVOP_VCALL1(pv_mmu_ops.write_cr3, x);
 }
 
-static inline unsigned long __read_cr4(void)
-{
-       return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
-}
-
 static inline void __write_cr4(unsigned long x)
 {
        PVOP_VCALL1(pv_cpu_ops.write_cr4, x);
@@ -228,10 +223,6 @@ static inline void set_ldt(const void *addr, unsigned entries)
 {
        PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
 }
-static inline void store_idt(struct desc_ptr *dtr)
-{
-       PVOP_VCALL1(pv_cpu_ops.store_idt, dtr);
-}
 static inline unsigned long paravirt_store_tr(void)
 {
        return PVOP_CALL0(unsigned long, pv_cpu_ops.store_tr);
@@ -365,12 +356,6 @@ static inline void paravirt_release_p4d(unsigned long pfn)
        PVOP_VCALL1(pv_mmu_ops.release_p4d, pfn);
 }
 
-static inline void pte_update(struct mm_struct *mm, unsigned long addr,
-                             pte_t *ptep)
-{
-       PVOP_VCALL3(pv_mmu_ops.pte_update, mm, addr, ptep);
-}
-
 static inline pte_t __pte(pteval_t val)
 {
        pteval_t ret;
@@ -472,28 +457,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
                PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
 }
 
-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
-                             pmd_t *pmdp, pmd_t pmd)
-{
-       if (sizeof(pmdval_t) > sizeof(long))
-               /* 5 arg words */
-               pv_mmu_ops.set_pmd_at(mm, addr, pmdp, pmd);
-       else
-               PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
-                           native_pmd_val(pmd));
-}
-
-static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
-                             pud_t *pudp, pud_t pud)
-{
-       if (sizeof(pudval_t) > sizeof(long))
-               /* 5 arg words */
-               pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
-       else
-               PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
-                           native_pud_val(pud));
-}
-
 static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
        pmdval_t val = native_pmd_val(pmd);
index 6b64fc6367f2520858cce6e0c31d111836f1e509..42873edd9f9d20cda2d1cfcb92b3798c4a75b3aa 100644 (file)
@@ -107,7 +107,6 @@ struct pv_cpu_ops {
        unsigned long (*read_cr0)(void);
        void (*write_cr0)(unsigned long);
 
-       unsigned long (*read_cr4)(void);
        void (*write_cr4)(unsigned long);
 
 #ifdef CONFIG_X86_64
@@ -119,8 +118,6 @@ struct pv_cpu_ops {
        void (*load_tr_desc)(void);
        void (*load_gdt)(const struct desc_ptr *);
        void (*load_idt)(const struct desc_ptr *);
-       /* store_gdt has been removed. */
-       void (*store_idt)(struct desc_ptr *);
        void (*set_ldt)(const void *desc, unsigned entries);
        unsigned long (*store_tr)(void);
        void (*load_tls)(struct thread_struct *t, unsigned int cpu);
@@ -245,12 +242,6 @@ struct pv_mmu_ops {
        void (*set_pte_at)(struct mm_struct *mm, unsigned long addr,
                           pte_t *ptep, pte_t pteval);
        void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
-       void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
-                          pmd_t *pmdp, pmd_t pmdval);
-       void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
-                          pud_t *pudp, pud_t pudval);
-       void (*pte_update)(struct mm_struct *mm, unsigned long addr,
-                          pte_t *ptep);
 
        pte_t (*ptep_modify_prot_start)(struct mm_struct *mm, unsigned long addr,
                                        pte_t *ptep);
index 5b4c44d419c55cafbafbddc8562e5c4b8d074c29..b714934512b391905ed37400b6acb37ce6529ca2 100644 (file)
@@ -55,8 +55,6 @@ extern pmdval_t early_pmd_flags;
 #else  /* !CONFIG_PARAVIRT */
 #define set_pte(ptep, pte)             native_set_pte(ptep, pte)
 #define set_pte_at(mm, addr, ptep, pte)        native_set_pte_at(mm, addr, ptep, pte)
-#define set_pmd_at(mm, addr, pmdp, pmd)        native_set_pmd_at(mm, addr, pmdp, pmd)
-#define set_pud_at(mm, addr, pudp, pud)        native_set_pud_at(mm, addr, pudp, pud)
 
 #define set_pte_atomic(ptep, pte)                                      \
        native_set_pte_atomic(ptep, pte)
@@ -87,8 +85,6 @@ extern pmdval_t early_pmd_flags;
 #define pte_clear(mm, addr, ptep)      native_pte_clear(mm, addr, ptep)
 #define pmd_clear(pmd)                 native_pmd_clear(pmd)
 
-#define pte_update(mm, addr, ptep)              do { } while (0)
-
 #define pgd_val(x)     native_pgd_val(x)
 #define __pgd(x)       native_make_pgd(x)
 
@@ -979,31 +975,18 @@ static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
        native_set_pte(ptep, pte);
 }
 
-static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
-                                    pmd_t *pmdp , pmd_t pmd)
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+                             pmd_t *pmdp, pmd_t pmd)
 {
        native_set_pmd(pmdp, pmd);
 }
 
-static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
-                                    pud_t *pudp, pud_t pud)
+static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
+                             pud_t *pudp, pud_t pud)
 {
        native_set_pud(pudp, pud);
 }
 
-#ifndef CONFIG_PARAVIRT
-/*
- * Rules for using pte_update - it must be called after any PTE update which
- * has not been done using the set_pte / clear_pte interfaces.  It is used by
- * shadow mode hypervisors to resynchronize the shadow page tables.  Kernel PTE
- * updates should either be sets, clears, or set_pte_atomic for P->P
- * transitions, which means this hook should only be called for user PTEs.
- * This hook implies a P->P protection or access change has taken place, which
- * requires a subsequent TLB flush.
- */
-#define pte_update(mm, addr, ptep)             do { } while (0)
-#endif
-
 /*
  * We only update the dirty/accessed state if we set
  * the dirty bit by hand in the kernel, since the hardware
@@ -1031,7 +1014,6 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
                                       pte_t *ptep)
 {
        pte_t pte = native_ptep_get_and_clear(ptep);
-       pte_update(mm, addr, ptep);
        return pte;
 }
 
@@ -1058,7 +1040,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
                                      unsigned long addr, pte_t *ptep)
 {
        clear_bit(_PAGE_BIT_RW, (unsigned long *)&ptep->pte);
-       pte_update(mm, addr, ptep);
 }
 
 #define flush_tlb_fix_spurious_fault(vma, address) do { } while (0)
index 9efaabf5b54be04d722295dea1fd20a6e2515825..a24dfcf79f4acadd7b49f47c5138bc05cc50aa56 100644 (file)
@@ -135,6 +135,11 @@ static inline void native_wbinvd(void)
 
 extern asmlinkage void native_load_gs_index(unsigned);
 
+static inline unsigned long __read_cr4(void)
+{
+       return native_read_cr4();
+}
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -173,11 +178,6 @@ static inline void write_cr3(unsigned long x)
        native_write_cr3(x);
 }
 
-static inline unsigned long __read_cr4(void)
-{
-       return native_read_cr4();
-}
-
 static inline void __write_cr4(unsigned long x)
 {
        native_write_cr4(x);
index 7032f4d8dff30be9557eedb26f6dcb779b26d3eb..f65d12504e8051f2e47aae2a0076a883c1ce535e 100644 (file)
 /* Recommend using the newer ExProcessorMasks interface */
 #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED  (1 << 11)
 
-/*
- * HV_VP_SET available
- */
-#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED  (1 << 11)
-
-
 /*
  * Crash notification flag.
  */
index fb1d3358a4af5bde82e5d8dd751b2db3271a2c82..775f10100d7febac27a84bb9c8deab24e119a8c8 100644 (file)
@@ -169,21 +169,21 @@ static int __init x86_mpx_setup(char *s)
 __setup("nompx", x86_mpx_setup);
 
 #ifdef CONFIG_X86_64
-static int __init x86_pcid_setup(char *s)
+static int __init x86_nopcid_setup(char *s)
 {
-       /* require an exact match without trailing characters */
-       if (strlen(s))
-               return 0;
+       /* nopcid doesn't accept parameters */
+       if (s)
+               return -EINVAL;
 
        /* do not emit a message if the feature is not present */
        if (!boot_cpu_has(X86_FEATURE_PCID))
-               return 1;
+               return 0;
 
        setup_clear_cpu_cap(X86_FEATURE_PCID);
        pr_info("nopcid: PCID feature disabled\n");
-       return 1;
+       return 0;
 }
-__setup("nopcid", x86_pcid_setup);
+early_param("nopcid", x86_nopcid_setup);
 #endif
 
 static int __init x86_noinvpcid_setup(char *s)
@@ -329,38 +329,6 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
        }
 }
 
-static void setup_pcid(struct cpuinfo_x86 *c)
-{
-       if (cpu_has(c, X86_FEATURE_PCID)) {
-               if (cpu_has(c, X86_FEATURE_PGE)) {
-                       /*
-                        * We'd like to use cr4_set_bits_and_update_boot(),
-                        * but we can't.  CR4.PCIDE is special and can only
-                        * be set in long mode, and the early CPU init code
-                        * doesn't know this and would try to restore CR4.PCIDE
-                        * prior to entering long mode.
-                        *
-                        * Instead, we rely on the fact that hotplug, resume,
-                        * etc all fully restore CR4 before they write anything
-                        * that could have nonzero PCID bits to CR3.  CR4.PCIDE
-                        * has no effect on the page tables themselves, so we
-                        * don't need it to be restored early.
-                        */
-                       cr4_set_bits(X86_CR4_PCIDE);
-               } else {
-                       /*
-                        * flush_tlb_all(), as currently implemented, won't
-                        * work if PCID is on but PGE is not.  Since that
-                        * combination doesn't exist on real hardware, there's
-                        * no reason to try to fully support it, but it's
-                        * polite to avoid corrupting data if we're on
-                        * an improperly configured VM.
-                        */
-                       clear_cpu_cap(c, X86_FEATURE_PCID);
-               }
-       }
-}
-
 /*
  * Protection Keys are not available in 32-bit mode.
  */
@@ -1175,9 +1143,6 @@ static void identify_cpu(struct cpuinfo_x86 *c)
        setup_smep(c);
        setup_smap(c);
 
-       /* Set up PCID */
-       setup_pcid(c);
-
        /*
         * The vendor-specific functions might have changed features.
         * Now we do "generic changes."
index 3b3f713e15e5fea418ebc8eae3bc7a948570dd73..236324e83a3ae0755c4de2a087983759793c0add 100644 (file)
@@ -59,8 +59,6 @@ void hyperv_vector_handler(struct pt_regs *regs)
 void hv_setup_vmbus_irq(void (*handler)(void))
 {
        vmbus_handler = handler;
-       /* Setup the IDT for hypervisor callback */
-       alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
 }
 
 void hv_remove_vmbus_irq(void)
@@ -251,6 +249,8 @@ static void __init ms_hyperv_init_platform(void)
         */
        x86_platform.apic_post_init = hyperv_init;
        hyperv_setup_mmu_ops();
+       /* Setup the IDT for hypervisor callback */
+       alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
 #endif
 }
 
index a14df9eecfed16598e131c41c3df4967a18f2871..19a3e8f961c772af6572f80a860e1c892ad31a32 100644 (file)
@@ -327,7 +327,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
        .set_debugreg = native_set_debugreg,
        .read_cr0 = native_read_cr0,
        .write_cr0 = native_write_cr0,
-       .read_cr4 = native_read_cr4,
        .write_cr4 = native_write_cr4,
 #ifdef CONFIG_X86_64
        .read_cr8 = native_read_cr8,
@@ -343,7 +342,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
        .set_ldt = native_set_ldt,
        .load_gdt = native_load_gdt,
        .load_idt = native_load_idt,
-       .store_idt = native_store_idt,
        .store_tr = native_store_tr,
        .load_tls = native_load_tls,
 #ifdef CONFIG_X86_64
@@ -411,8 +409,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
        .set_pte = native_set_pte,
        .set_pte_at = native_set_pte_at,
        .set_pmd = native_set_pmd,
-       .set_pmd_at = native_set_pmd_at,
-       .pte_update = paravirt_nop,
 
        .ptep_modify_prot_start = __ptep_modify_prot_start,
        .ptep_modify_prot_commit = __ptep_modify_prot_commit,
@@ -424,7 +420,6 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
        .pmd_clear = native_pmd_clear,
 #endif
        .set_pud = native_set_pud,
-       .set_pud_at = native_set_pud_at,
 
        .pmd_val = PTE_IDENT,
        .make_pmd = PTE_IDENT,
index d84afb0a322dd8515fb91424eef6cc42fcbfaee2..0957dd73d127554803f35d5be45dcddb845ca741 100644 (file)
@@ -1178,8 +1178,11 @@ void __init setup_arch(char **cmdline_p)
         * with the current CR4 value.  This may not be necessary, but
         * auditing all the early-boot CR4 manipulation would be needed to
         * rule it out.
+        *
+        * Mask off features that don't work outside long mode (just
+        * PCIDE for now).
         */
-       mmu_cr4_features = __read_cr4();
+       mmu_cr4_features = __read_cr4() & ~X86_CR4_PCIDE;
 
        memblock_set_current_limit(get_max_mapped());
 
index cd6622c3204e6b4c5c159a659b0b23f4728da34a..0854ff1692745adf4831e2deea0cb203977847d9 100644 (file)
@@ -226,10 +226,12 @@ static int enable_start_cpu0;
 static void notrace start_secondary(void *unused)
 {
        /*
-        * Don't put *anything* before cpu_init(), SMP booting is too
-        * fragile that we want to limit the things done here to the
-        * most necessary things.
+        * Don't put *anything* except direct CPU state initialization
+        * before cpu_init(), SMP booting is too fragile that we want to
+        * limit the things done here to the most necessary things.
         */
+       if (boot_cpu_has(X86_FEATURE_PCID))
+               __write_cr4(__read_cr4() | X86_CR4_PCIDE);
        cpu_init();
        x86_cpuinit.early_percpu_clock_init();
        preempt_disable();
index 4253adef9044c10429094495e01074da46b0d864..699704d4bc9e7716e04da7298ec7f4491b436af1 100644 (file)
@@ -5192,7 +5192,7 @@ static void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
        vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS);  /* 22.2.4 */
        vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8);  /* 22.2.4 */
 
-       native_store_idt(&dt);
+       store_idt(&dt);
        vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
        vmx->host_idt_base = dt.address;
 
index 7777ccc0e9f979dc76cc9d520885eea02114223e..af5c1ed21d43ac651ecbe02e7a58dc7baa884168 100644 (file)
@@ -19,6 +19,7 @@
 #include <asm/microcode.h>
 #include <asm/kaslr.h>
 #include <asm/hypervisor.h>
+#include <asm/cpufeature.h>
 
 /*
  * We need to define the tracepoints somewhere, and tlb.c
@@ -193,6 +194,38 @@ static void __init probe_page_size_mask(void)
        }
 }
 
+static void setup_pcid(void)
+{
+#ifdef CONFIG_X86_64
+       if (boot_cpu_has(X86_FEATURE_PCID)) {
+               if (boot_cpu_has(X86_FEATURE_PGE)) {
+                       /*
+                        * This can't be cr4_set_bits_and_update_boot() --
+                        * the trampoline code can't handle CR4.PCIDE and
+                        * it wouldn't do any good anyway.  Despite the name,
+                        * cr4_set_bits_and_update_boot() doesn't actually
+                        * cause the bits in question to remain set all the
+                        * way through the secondary boot asm.
+                        *
+                        * Instead, we brute-force it and set CR4.PCIDE
+                        * manually in start_secondary().
+                        */
+                       cr4_set_bits(X86_CR4_PCIDE);
+               } else {
+                       /*
+                        * flush_tlb_all(), as currently implemented, won't
+                        * work if PCID is on but PGE is not.  Since that
+                        * combination doesn't exist on real hardware, there's
+                        * no reason to try to fully support it, but it's
+                        * polite to avoid corrupting data if we're on
+                        * an improperly configured VM.
+                        */
+                       setup_clear_cpu_cap(X86_FEATURE_PCID);
+               }
+       }
+#endif
+}
+
 #ifdef CONFIG_X86_32
 #define NR_RANGE_MR 3
 #else /* CONFIG_X86_64 */
@@ -592,6 +625,7 @@ void __init init_mem_mapping(void)
        unsigned long end;
 
        probe_page_size_mask();
+       setup_pcid();
 
 #ifdef CONFIG_X86_64
        end = max_pfn << PAGE_SHIFT;
index 218834a3e9adde25ea3ddf3ef2b3f513846ce1c7..b372f3442bbf3b5cff5782001e5c79b46d49edc6 100644 (file)
@@ -426,10 +426,8 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 {
        int changed = !pte_same(*ptep, entry);
 
-       if (changed && dirty) {
+       if (changed && dirty)
                *ptep = entry;
-               pte_update(vma->vm_mm, address, ptep);
-       }
 
        return changed;
 }
@@ -486,9 +484,6 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
                ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
                                         (unsigned long *) &ptep->pte);
 
-       if (ret)
-               pte_update(vma->vm_mm, addr, ptep);
-
        return ret;
 }
 
index 37689a7cc03b493927f50b2d14be5f72b3981a02..1ab3821f9e2629df571544077d63be950361bc20 100644 (file)
@@ -121,8 +121,28 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
         * hypothetical buggy code that directly switches to swapper_pg_dir
         * without going through leave_mm() / switch_mm_irqs_off() or that
         * does something like write_cr3(read_cr3_pa()).
+        *
+        * Only do this check if CONFIG_DEBUG_VM=y because __read_cr3()
+        * isn't free.
         */
-       VM_BUG_ON(__read_cr3() != (__sme_pa(real_prev->pgd) | prev_asid));
+#ifdef CONFIG_DEBUG_VM
+       if (WARN_ON_ONCE(__read_cr3() !=
+                        (__sme_pa(real_prev->pgd) | prev_asid))) {
+               /*
+                * If we were to BUG here, we'd be very likely to kill
+                * the system so hard that we don't see the call trace.
+                * Try to recover instead by ignoring the error and doing
+                * a global flush to minimize the chance of corruption.
+                *
+                * (This is far from being a fully correct recovery.
+                *  Architecturally, the CPU could prefetch something
+                *  back into an incorrect ASID slot and leave it there
+                *  to cause trouble down the road.  It's better than
+                *  nothing, though.)
+                */
+               __flush_tlb_all();
+       }
+#endif
 
        if (real_prev == next) {
                VM_BUG_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
index f2598d81cd55067ffd695a18bde0352a5d98264b..f910c514438f168a5f09afbd312311498abe2c50 100644 (file)
@@ -295,7 +295,26 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
                return -EOVERFLOW;
        rdr->jump_address = (unsigned long)restore_registers;
        rdr->jump_address_phys = __pa_symbol(restore_registers);
-       rdr->cr3 = restore_cr3;
+
+       /*
+        * The restore code fixes up CR3 and CR4 in the following sequence:
+        *
+        * [in hibernation asm]
+        * 1. CR3 <= temporary page tables
+        * 2. CR4 <= mmu_cr4_features (from the kernel that restores us)
+        * 3. CR3 <= rdr->cr3
+        * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel)
+        * [in restore_processor_state()]
+        * 5. CR4 <= saved CR4
+        * 6. CR3 <= saved CR3
+        *
+        * Our mmu_cr4_features has CR4.PCIDE=0, and toggling
+        * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so
+        * rdr->cr3 needs to point to valid page tables but must not
+        * have any of the PCID bits set.
+        */
+       rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
+
        rdr->magic = RESTORE_MAGIC;
 
        hibernation_e820_save(rdr->e820_digest);
index ae2a2e2d636286f36e570dde5b4cb9d29dc55f5b..69b9deff7e5c84c4133064ada0606eb872908152 100644 (file)
@@ -1038,7 +1038,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
        .read_cr0 = xen_read_cr0,
        .write_cr0 = xen_write_cr0,
 
-       .read_cr4 = native_read_cr4,
        .write_cr4 = xen_write_cr4,
 
 #ifdef CONFIG_X86_64
@@ -1073,7 +1072,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
        .alloc_ldt = xen_alloc_ldt,
        .free_ldt = xen_free_ldt,
 
-       .store_idt = native_store_idt,
        .store_tr = xen_store_tr,
 
        .write_ldt_entry = xen_write_ldt_entry,
index 6b983b300666bd178c5b898f35e6c8c134d2a800..509f560bd0c6d4731cac96fc64296184e6818b9c 100644 (file)
@@ -2409,8 +2409,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
        .flush_tlb_single = xen_flush_tlb_single,
        .flush_tlb_others = xen_flush_tlb_others,
 
-       .pte_update = paravirt_nop,
-
        .pgd_alloc = xen_pgd_alloc,
        .pgd_free = xen_pgd_free,