Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 8 Sep 2018 22:52:45 +0000 (15:52 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 8 Sep 2018 22:52:45 +0000 (15:52 -0700)
Pull KVM fixes from Radim Krčmář:
 "ARM:
   - Fix a VFP corruption in 32-bit guest
   - Add missing cache invalidation for CoW pages
   - Two small cleanups

  s390:
   - Fallout from the hugetlbfs support: pfmf interpretion and locking
   - VSIE: fix keywrapping for nested guests

  PPC:
   - Fix a bug where pages might not get marked dirty, causing guest
     memory corruption on migration
   - Fix a bug causing reads from guest memory to use the wrong guest
     real address for very large HPT guests (>256G of memory), leading
     to failures in instruction emulation.

  x86:
   - Fix out of bound access from malicious pv ipi hypercalls
     (introduced in rc1)
   - Fix delivery of pending interrupts when entering a nested guest,
     preventing arbitrarily late injection
   - Sanitize kvm_stat output after destroying a guest
   - Fix infinite loop when emulating a nested guest page fault and
     improve the surrounding emulation code
   - Two minor cleanups"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits)
  KVM: LAPIC: Fix pv ipis out-of-bounds access
  KVM: nVMX: Fix loss of pending IRQ/NMI before entering L2
  arm64: KVM: Remove pgd_lock
  KVM: Remove obsolete kvm_unmap_hva notifier backend
  arm64: KVM: Only force FPEXC32_EL2.EN if trapping FPSIMD
  KVM: arm/arm64: Clean dcache to PoC when changing PTE due to CoW
  KVM: s390: Properly lock mm context allow_gmap_hpage_1m setting
  KVM: s390: vsie: copy wrapping keys to right place
  KVM: s390: Fix pfmf and conditional skey emulation
  tools/kvm_stat: re-animate display of dead guests
  tools/kvm_stat: indicate dead guests as such
  tools/kvm_stat: handle guest removals more gracefully
  tools/kvm_stat: don't reset stats when setting PID filter for debugfs
  tools/kvm_stat: fix updates for dead guests
  tools/kvm_stat: fix handling of invalid paths in debugfs provider
  tools/kvm_stat: fix python3 issues
  KVM: x86: Unexport x86_emulate_instruction()
  KVM: x86: Rename emulate_instruction() to kvm_emulate_instruction()
  KVM: x86: Do not re-{try,execute} after failed emulation in L2
  KVM: x86: Default to not allowing emulation retry in kvm_mmu_page_fault
  ...

21 files changed:
arch/arm/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/kvm/hyp/switch.c
arch/mips/include/asm/kvm_host.h
arch/mips/kvm/mmu.c
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_64_mmu_radix.c
arch/s390/include/asm/mmu.h
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/priv.c
arch/s390/kvm/vsie.c
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
tools/kvm/kvm_stat/kvm_stat
virt/kvm/arm/mmu.c
virt/kvm/arm/trace.h

index 79906cecb091e4fbf8dafb850d9d4b7ea19c3c41..3ad482d2f1eb91c8bfe6b597788e1e70b9521234 100644 (file)
@@ -223,7 +223,6 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
                              struct kvm_vcpu_events *events);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
                        unsigned long start, unsigned long end);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
index f26055f2306e1f9a479417507c2edf428c9f99de..3d6d7336f871221fd29bcc3bc4faa2cee0a7765f 100644 (file)
@@ -61,8 +61,7 @@ struct kvm_arch {
        u64    vmid_gen;
        u32    vmid;
 
-       /* 1-level 2nd stage table and lock */
-       spinlock_t pgd_lock;
+       /* 1-level 2nd stage table, protected by kvm->mmu_lock */
        pgd_t *pgd;
 
        /* VTTBR value associated with above pgd and vmid */
@@ -357,7 +356,6 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
                              struct kvm_vcpu_events *events);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
                        unsigned long start, unsigned long end);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
index d496ef579859627edd1ba98c1233d9584cd407e3..ca46153d79154bae1b0833231245129752484362 100644 (file)
@@ -98,8 +98,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
        val = read_sysreg(cpacr_el1);
        val |= CPACR_EL1_TTA;
        val &= ~CPACR_EL1_ZEN;
-       if (!update_fp_enabled(vcpu))
+       if (!update_fp_enabled(vcpu)) {
                val &= ~CPACR_EL1_FPEN;
+               __activate_traps_fpsimd32(vcpu);
+       }
 
        write_sysreg(val, cpacr_el1);
 
@@ -114,8 +116,10 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
 
        val = CPTR_EL2_DEFAULT;
        val |= CPTR_EL2_TTA | CPTR_EL2_TZ;
-       if (!update_fp_enabled(vcpu))
+       if (!update_fp_enabled(vcpu)) {
                val |= CPTR_EL2_TFP;
+               __activate_traps_fpsimd32(vcpu);
+       }
 
        write_sysreg(val, cptr_el2);
 }
@@ -129,7 +133,6 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
        if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
                write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
 
-       __activate_traps_fpsimd32(vcpu);
        if (has_vhe())
                activate_traps_vhe(vcpu);
        else
index a9af1d2dcd699114d00a55689c29137cef384841..2c1c53d12179302140d3576dddd11a732a5b13d9 100644 (file)
@@ -931,7 +931,6 @@ enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu,
                                                   bool write);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
                        unsigned long start, unsigned long end);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
index ee64db03279336db79ac5c98e7634074d47608ac..d8dcdb350405900928b83e7afa2112ecf3122518 100644 (file)
@@ -512,16 +512,6 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
        return 1;
 }
 
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
-{
-       unsigned long end = hva + PAGE_SIZE;
-
-       handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
-
-       kvm_mips_callbacks->flush_shadow_all(kvm);
-       return 0;
-}
-
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 {
        handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
index 3c0e8fb2b773ebaf3bdfe404b2909d2d5ed48b4a..68e14afecac85b1d1fd0eff661f6a3aabe0217a6 100644 (file)
@@ -358,7 +358,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
        unsigned long pp, key;
        unsigned long v, orig_v, gr;
        __be64 *hptep;
-       int index;
+       long int index;
        int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
 
        if (kvm_is_radix(vcpu->kvm))
index 0af1c0aea1fe659fca4723e17a02cc17eb8fa08f..fd6e8c13685f4c0223749647ad04c34a71ad4589 100644 (file)
@@ -725,10 +725,10 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
                                              gpa, shift);
                kvmppc_radix_tlbie_page(kvm, gpa, shift);
                if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
-                       unsigned long npages = 1;
+                       unsigned long psize = PAGE_SIZE;
                        if (shift)
-                               npages = 1ul << (shift - PAGE_SHIFT);
-                       kvmppc_update_dirty_map(memslot, gfn, npages);
+                               psize = 1ul << shift;
+                       kvmppc_update_dirty_map(memslot, gfn, psize);
                }
        }
        return 0;                               
index f31a15044c24a56875661aa6d3e195d75bd9ef9c..a8418e1379eb7ee08c92acd034eae000cb19c695 100644 (file)
@@ -16,7 +16,13 @@ typedef struct {
        unsigned long asce;
        unsigned long asce_limit;
        unsigned long vdso_base;
-       /* The mmu context allocates 4K page tables. */
+       /*
+        * The following bitfields need a down_write on the mm
+        * semaphore when they are written to. As they are only
+        * written once, they can be read without a lock.
+        *
+        * The mmu context allocates 4K page tables.
+        */
        unsigned int alloc_pgste:1;
        /* The mmu context uses extended page tables. */
        unsigned int has_pgste:1;
index 91ad4a9425c0b74c024f07fe8347651c91f4bfb7..f69333fd2fa3818c5eeb8bff9240f67e29f37ef0 100644 (file)
@@ -695,7 +695,9 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
                        r = -EINVAL;
                else {
                        r = 0;
+                       down_write(&kvm->mm->mmap_sem);
                        kvm->mm->context.allow_gmap_hpage_1m = 1;
+                       up_write(&kvm->mm->mmap_sem);
                        /*
                         * We might have to create fake 4k page
                         * tables. To avoid that the hardware works on
index d68f10441a164f2c22236adaa143edf6b1d0f7d0..8679bd74d337a583a3dde940d0cef1f427373a4b 100644 (file)
@@ -280,9 +280,11 @@ retry:
                        goto retry;
                }
        }
-       if (rc)
-               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
        up_read(&current->mm->mmap_sem);
+       if (rc == -EFAULT)
+               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       if (rc < 0)
+               return rc;
        vcpu->run->s.regs.gprs[reg1] &= ~0xff;
        vcpu->run->s.regs.gprs[reg1] |= key;
        return 0;
@@ -324,9 +326,11 @@ retry:
                        goto retry;
                }
        }
-       if (rc < 0)
-               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
        up_read(&current->mm->mmap_sem);
+       if (rc == -EFAULT)
+               return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+       if (rc < 0)
+               return rc;
        kvm_s390_set_psw_cc(vcpu, rc);
        return 0;
 }
@@ -390,12 +394,12 @@ static int handle_sske(struct kvm_vcpu *vcpu)
                                              FAULT_FLAG_WRITE, &unlocked);
                        rc = !rc ? -EAGAIN : rc;
                }
+               up_read(&current->mm->mmap_sem);
                if (rc == -EFAULT)
                        return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-
-               up_read(&current->mm->mmap_sem);
-               if (rc >= 0)
-                       start += PAGE_SIZE;
+               if (rc < 0)
+                       return rc;
+               start += PAGE_SIZE;
        }
 
        if (m3 & (SSKE_MC | SSKE_MR)) {
@@ -1002,13 +1006,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
                                                      FAULT_FLAG_WRITE, &unlocked);
                                rc = !rc ? -EAGAIN : rc;
                        }
+                       up_read(&current->mm->mmap_sem);
                        if (rc == -EFAULT)
                                return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-
-                       up_read(&current->mm->mmap_sem);
-                       if (rc >= 0)
-                               start += PAGE_SIZE;
+                       if (rc == -EAGAIN)
+                               continue;
+                       if (rc < 0)
+                               return rc;
                }
+               start += PAGE_SIZE;
        }
        if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
                if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {
index 63844b95c22c9902df769928313b988ee7d7667b..a2b28cd1e3fedb2bdcc6dbb1cff530ba0e3371a7 100644 (file)
@@ -173,7 +173,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
                return set_validity_icpt(scb_s, 0x0039U);
 
        /* copy only the wrapping keys */
-       if (read_guest_real(vcpu, crycb_addr + 72, &vsie_page->crycb, 56))
+       if (read_guest_real(vcpu, crycb_addr + 72,
+                           vsie_page->crycb.dea_wrapping_key_mask, 56))
                return set_validity_icpt(scb_s, 0x0035U);
 
        scb_s->ecb3 |= ecb3_flags;
index 00ddb0c9e612a6e084298ef0f7372b924b76ba12..8e90488c3d56895f62080666e2141df1731092f3 100644 (file)
@@ -1237,19 +1237,12 @@ enum emulation_result {
 #define EMULTYPE_NO_DECODE         (1 << 0)
 #define EMULTYPE_TRAP_UD           (1 << 1)
 #define EMULTYPE_SKIP              (1 << 2)
-#define EMULTYPE_RETRY             (1 << 3)
-#define EMULTYPE_NO_REEXECUTE      (1 << 4)
-#define EMULTYPE_NO_UD_ON_FAIL     (1 << 5)
-#define EMULTYPE_VMWARE                    (1 << 6)
-int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
-                           int emulation_type, void *insn, int insn_len);
-
-static inline int emulate_instruction(struct kvm_vcpu *vcpu,
-                       int emulation_type)
-{
-       return x86_emulate_instruction(vcpu, 0,
-                       emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
-}
+#define EMULTYPE_ALLOW_RETRY       (1 << 3)
+#define EMULTYPE_NO_UD_ON_FAIL     (1 << 4)
+#define EMULTYPE_VMWARE                    (1 << 5)
+int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
+int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
+                                       void *insn, int insn_len);
 
 void kvm_enable_efer_bits(u64);
 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
@@ -1450,7 +1443,6 @@ asmlinkage void kvm_spurious_fault(void);
        ____kvm_handle_fault_on_reboot(insn, "")
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
@@ -1463,7 +1455,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
 
 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
-                   unsigned long ipi_bitmap_high, int min,
+                   unsigned long ipi_bitmap_high, u32 min,
                    unsigned long icr, int op_64_bit);
 
 u64 kvm_get_arch_capabilities(void);
index 0cefba28c864a3a0925378ed9b438a05b554cff2..17c0472c5b344faaaac3153ff53c4600a3fcd81e 100644 (file)
@@ -548,7 +548,7 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 }
 
 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
-                   unsigned long ipi_bitmap_high, int min,
+                   unsigned long ipi_bitmap_high, u32 min,
                    unsigned long icr, int op_64_bit)
 {
        int i;
@@ -571,18 +571,31 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
        rcu_read_lock();
        map = rcu_dereference(kvm->arch.apic_map);
 
+       if (min > map->max_apic_id)
+               goto out;
        /* Bits above cluster_size are masked in the caller.  */
-       for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
-               vcpu = map->phys_map[min + i]->vcpu;
-               count += kvm_apic_set_irq(vcpu, &irq, NULL);
+       for_each_set_bit(i, &ipi_bitmap_low,
+               min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+               if (map->phys_map[min + i]) {
+                       vcpu = map->phys_map[min + i]->vcpu;
+                       count += kvm_apic_set_irq(vcpu, &irq, NULL);
+               }
        }
 
        min += cluster_size;
-       for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
-               vcpu = map->phys_map[min + i]->vcpu;
-               count += kvm_apic_set_irq(vcpu, &irq, NULL);
+
+       if (min > map->max_apic_id)
+               goto out;
+
+       for_each_set_bit(i, &ipi_bitmap_high,
+               min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+               if (map->phys_map[min + i]) {
+                       vcpu = map->phys_map[min + i]->vcpu;
+                       count += kvm_apic_set_irq(vcpu, &irq, NULL);
+               }
        }
 
+out:
        rcu_read_unlock();
        return count;
 }
index a282321329b51a10e7563b0d422bd9e38d57b530..e24ea7067373af69d258c46995007b0446a69fdc 100644 (file)
@@ -1853,11 +1853,6 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
        return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
 }
 
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
-{
-       return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
-}
-
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 {
        return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
@@ -5217,7 +5212,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
                       void *insn, int insn_len)
 {
-       int r, emulation_type = EMULTYPE_RETRY;
+       int r, emulation_type = 0;
        enum emulation_result er;
        bool direct = vcpu->arch.mmu.direct_map;
 
@@ -5230,10 +5225,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
        r = RET_PF_INVALID;
        if (unlikely(error_code & PFERR_RSVD_MASK)) {
                r = handle_mmio_page_fault(vcpu, cr2, direct);
-               if (r == RET_PF_EMULATE) {
-                       emulation_type = 0;
+               if (r == RET_PF_EMULATE)
                        goto emulate;
-               }
        }
 
        if (r == RET_PF_INVALID) {
@@ -5260,8 +5253,19 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
                return 1;
        }
 
-       if (mmio_info_in_cache(vcpu, cr2, direct))
-               emulation_type = 0;
+       /*
+        * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still
+        * optimistically try to just unprotect the page and let the processor
+        * re-execute the instruction that caused the page fault.  Do not allow
+        * retrying MMIO emulation, as it's not only pointless but could also
+        * cause us to enter an infinite loop because the processor will keep
+        * faulting on the non-existent MMIO address.  Retrying an instruction
+        * from a nested guest is also pointless and dangerous as we are only
+        * explicitly shadowing L1's page tables, i.e. unprotecting something
+        * for L1 isn't going to magically fix whatever issue cause L2 to fail.
+        */
+       if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
+               emulation_type = EMULTYPE_ALLOW_RETRY;
 emulate:
        /*
         * On AMD platforms, under certain conditions insn_len may be zero on #NPF.
index 6276140044d0848b58e6778c31cf4d247520ac44..89c4c5aa15f16c71af5404f302a627313a61fd96 100644 (file)
@@ -776,7 +776,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
        }
 
        if (!svm->next_rip) {
-               if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
+               if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
                                EMULATE_DONE)
                        printk(KERN_DEBUG "%s: NOP\n", __func__);
                return;
@@ -2715,7 +2715,7 @@ static int gp_interception(struct vcpu_svm *svm)
 
        WARN_ON_ONCE(!enable_vmware_backdoor);
 
-       er = emulate_instruction(vcpu,
+       er = kvm_emulate_instruction(vcpu,
                EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
        if (er == EMULATE_USER_EXIT)
                return 0;
@@ -2819,7 +2819,7 @@ static int io_interception(struct vcpu_svm *svm)
        string = (io_info & SVM_IOIO_STR_MASK) != 0;
        in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
        if (string)
-               return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+               return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
        port = io_info >> 16;
        size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
@@ -3861,7 +3861,7 @@ static int iret_interception(struct vcpu_svm *svm)
 static int invlpg_interception(struct vcpu_svm *svm)
 {
        if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
-               return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+               return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
 
        kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
        return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -3869,13 +3869,13 @@ static int invlpg_interception(struct vcpu_svm *svm)
 
 static int emulate_on_interception(struct vcpu_svm *svm)
 {
-       return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+       return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
 }
 
 static int rsm_interception(struct vcpu_svm *svm)
 {
-       return x86_emulate_instruction(&svm->vcpu, 0, 0,
-                                      rsm_ins_bytes, 2) == EMULATE_DONE;
+       return kvm_emulate_instruction_from_buffer(&svm->vcpu,
+                                       rsm_ins_bytes, 2) == EMULATE_DONE;
 }
 
 static int rdpmc_interception(struct vcpu_svm *svm)
@@ -4700,7 +4700,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
                ret = avic_unaccel_trap_write(svm);
        } else {
                /* Handling Fault */
-               ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
+               ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
        }
 
        return ret;
@@ -6747,7 +6747,7 @@ e_free:
 static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
 {
        unsigned long vaddr, vaddr_end, next_vaddr;
-       unsigned long dst_vaddr, dst_vaddr_end;
+       unsigned long dst_vaddr;
        struct page **src_p, **dst_p;
        struct kvm_sev_dbg debug;
        unsigned long n;
@@ -6763,7 +6763,6 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
        size = debug.len;
        vaddr_end = vaddr + size;
        dst_vaddr = debug.dst_uaddr;
-       dst_vaddr_end = dst_vaddr + size;
 
        for (; vaddr < vaddr_end; vaddr = next_vaddr) {
                int len, s_off, d_off;
index 1d26f3c4985ba6dd5fc88d72959cd49b084606fb..533a327372c876df0b1c2b99ea3558e2aaa92df4 100644 (file)
@@ -6983,7 +6983,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
         * Cause the #SS fault with 0 error code in VM86 mode.
         */
        if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
-               if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
+               if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
                        if (vcpu->arch.halt_request) {
                                vcpu->arch.halt_request = 0;
                                return kvm_vcpu_halt(vcpu);
@@ -7054,7 +7054,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 
        if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
                WARN_ON_ONCE(!enable_vmware_backdoor);
-               er = emulate_instruction(vcpu,
+               er = kvm_emulate_instruction(vcpu,
                        EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
                if (er == EMULATE_USER_EXIT)
                        return 0;
@@ -7157,7 +7157,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
        ++vcpu->stat.io_exits;
 
        if (string)
-               return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+               return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
        port = exit_qualification >> 16;
        size = (exit_qualification & 7) + 1;
@@ -7231,7 +7231,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
 static int handle_desc(struct kvm_vcpu *vcpu)
 {
        WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
-       return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+       return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static int handle_cr(struct kvm_vcpu *vcpu)
@@ -7480,7 +7480,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
 
 static int handle_invd(struct kvm_vcpu *vcpu)
 {
-       return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+       return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static int handle_invlpg(struct kvm_vcpu *vcpu)
@@ -7547,7 +7547,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
                        return kvm_skip_emulated_instruction(vcpu);
                }
        }
-       return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+       return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
@@ -7704,8 +7704,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
                if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
                        return kvm_skip_emulated_instruction(vcpu);
                else
-                       return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP,
-                                                      NULL, 0) == EMULATE_DONE;
+                       return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
+                                                               EMULATE_DONE;
        }
 
        return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@@ -7748,7 +7748,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
                if (kvm_test_request(KVM_REQ_EVENT, vcpu))
                        return 1;
 
-               err = emulate_instruction(vcpu, 0);
+               err = kvm_emulate_instruction(vcpu, 0);
 
                if (err == EMULATE_USER_EXIT) {
                        ++vcpu->stat.mmio_exits;
@@ -12537,8 +12537,11 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
        struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
        bool from_vmentry = !!exit_qual;
        u32 dummy_exit_qual;
+       u32 vmcs01_cpu_exec_ctrl;
        int r = 0;
 
+       vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+
        enter_guest_mode(vcpu);
 
        if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
@@ -12574,6 +12577,25 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
                kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
        }
 
+       /*
+        * If L1 had a pending IRQ/NMI until it executed
+        * VMLAUNCH/VMRESUME which wasn't delivered because it was
+        * disallowed (e.g. interrupts disabled), L0 needs to
+        * evaluate if this pending event should cause an exit from L2
+        * to L1 or delivered directly to L2 (e.g. In case L1 don't
+        * intercept EXTERNAL_INTERRUPT).
+        *
+        * Usually this would be handled by L0 requesting a
+        * IRQ/NMI window by setting VMCS accordingly. However,
+        * this setting was done on VMCS01 and now VMCS02 is active
+        * instead. Thus, we force L0 to perform pending event
+        * evaluation by requesting a KVM_REQ_EVENT.
+        */
+       if (vmcs01_cpu_exec_ctrl &
+               (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
+               kvm_make_request(KVM_REQ_EVENT, vcpu);
+       }
+
        /*
         * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
         * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@@ -13988,9 +14010,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
            check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
                return -EINVAL;
 
-       if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
-               vmx->nested.nested_run_pending = 1;
-
        vmx->nested.dirty_vmcs12 = true;
        ret = enter_vmx_non_root_mode(vcpu, NULL);
        if (ret)
index 506bd2b4b8bb76e21a959310d33f6de6180719f8..542f6315444d75aa365ca04dba4bddd7bfb369d3 100644 (file)
@@ -4987,7 +4987,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
                emul_type = 0;
        }
 
-       er = emulate_instruction(vcpu, emul_type);
+       er = kvm_emulate_instruction(vcpu, emul_type);
        if (er == EMULATE_USER_EXIT)
                return 0;
        if (er != EMULATE_DONE)
@@ -5870,7 +5870,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
        gpa_t gpa = cr2;
        kvm_pfn_t pfn;
 
-       if (emulation_type & EMULTYPE_NO_REEXECUTE)
+       if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
+               return false;
+
+       if (WARN_ON_ONCE(is_guest_mode(vcpu)))
                return false;
 
        if (!vcpu->arch.mmu.direct_map) {
@@ -5958,7 +5961,10 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
         */
        vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
 
-       if (!(emulation_type & EMULTYPE_RETRY))
+       if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
+               return false;
+
+       if (WARN_ON_ONCE(is_guest_mode(vcpu)))
                return false;
 
        if (x86_page_table_writing_insn(ctxt))
@@ -6276,7 +6282,19 @@ restart:
 
        return r;
 }
-EXPORT_SYMBOL_GPL(x86_emulate_instruction);
+
+int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
+{
+       return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
+
+int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
+                                       void *insn, int insn_len)
+{
+       return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
 
 static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
                            unsigned short port)
@@ -7734,7 +7752,7 @@ static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
 {
        int r;
        vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-       r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+       r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
        srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
        if (r != EMULATE_DONE)
                return 0;
index 257f27620bc272e3312295714a120de07963441f..67b9568613f34abdcacc208b4f23ef1414a512eb 100644 (file)
@@ -274,6 +274,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
 bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
                                          int page_num);
 bool kvm_vector_hashing_enabled(void);
+int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
+                           int emulation_type, void *insn, int insn_len);
 
 #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
                                | XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
index 56c4b3f8a01beaa414e2526020dcfcfbfc142b9b..439b8a27488d371fe323f879ba225650df23a359 100755 (executable)
@@ -759,12 +759,18 @@ class DebugfsProvider(Provider):
             if len(vms) == 0:
                 self.do_read = False
 
-            self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
+            self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms))
 
         else:
             self.paths = []
             self.do_read = True
-        self.reset()
+
+    def _verify_paths(self):
+        """Remove invalid paths"""
+        for path in self.paths:
+            if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)):
+                self.paths.remove(path)
+                continue
 
     def read(self, reset=0, by_guest=0):
         """Returns a dict with format:'file name / field -> current value'.
@@ -780,6 +786,7 @@ class DebugfsProvider(Provider):
         # If no debugfs filtering support is available, then don't read.
         if not self.do_read:
             return results
+        self._verify_paths()
 
         paths = self.paths
         if self._pid == 0:
@@ -1096,15 +1103,16 @@ class Tui(object):
             pid = self.stats.pid_filter
         self.screen.erase()
         gname = self.get_gname_from_pid(pid)
+        self._gname = gname
         if gname:
             gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
                                    if len(gname) > MAX_GUEST_NAME_LEN
                                    else gname))
         if pid > 0:
-            self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}'
-                               .format(pid, gname), curses.A_BOLD)
+            self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname)
         else:
-            self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
+            self._headline = 'kvm statistics - summary'
+        self.screen.addstr(0, 0, self._headline, curses.A_BOLD)
         if self.stats.fields_filter:
             regex = self.stats.fields_filter
             if len(regex) > MAX_REGEX_LEN:
@@ -1162,6 +1170,19 @@ class Tui(object):
 
             return sorted_items
 
+        if not self._is_running_guest(self.stats.pid_filter):
+            if self._gname:
+                try: # ...to identify the guest by name in case it's back
+                    pids = self.get_pid_from_gname(self._gname)
+                    if len(pids) == 1:
+                        self._refresh_header(pids[0])
+                        self._update_pid(pids[0])
+                        return
+                except:
+                    pass
+            self._display_guest_dead()
+            # leave final data on screen
+            return
         row = 3
         self.screen.move(row, 0)
         self.screen.clrtobot()
@@ -1184,6 +1205,7 @@ class Tui(object):
         # print events
         tavg = 0
         tcur = 0
+        guest_removed = False
         for key, values in get_sorted_events(self, stats):
             if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
                 break
@@ -1191,7 +1213,10 @@ class Tui(object):
                 key = self.get_gname_from_pid(key)
                 if not key:
                     continue
-            cur = int(round(values.delta / sleeptime)) if values.delta else ''
+            cur = int(round(values.delta / sleeptime)) if values.delta else 0
+            if cur < 0:
+                guest_removed = True
+                continue
             if key[0] != ' ':
                 if values.delta:
                     tcur += values.delta
@@ -1204,13 +1229,21 @@ class Tui(object):
                                values.value * 100 / float(ltotal), cur))
             row += 1
         if row == 3:
-            self.screen.addstr(4, 1, 'No matching events reported yet')
+            if guest_removed:
+                self.screen.addstr(4, 1, 'Guest removed, updating...')
+            else:
+                self.screen.addstr(4, 1, 'No matching events reported yet')
         if row > 4:
             tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
             self.screen.addstr(row, 1, '%-40s %10d        %8s' %
                                ('Total', total, tavg), curses.A_BOLD)
         self.screen.refresh()
 
+    def _display_guest_dead(self):
+        marker = '   Guest is DEAD   '
+        y = min(len(self._headline), 80 - len(marker))
+        self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT)
+
     def _show_msg(self, text):
         """Display message centered text and exit on key press"""
         hint = 'Press any key to continue'
@@ -1219,10 +1252,10 @@ class Tui(object):
         (x, term_width) = self.screen.getmaxyx()
         row = 2
         for line in text:
-            start = (term_width - len(line)) / 2
+            start = (term_width - len(line)) // 2
             self.screen.addstr(row, start, line)
             row += 1
-        self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint,
+        self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint,
                            curses.A_STANDOUT)
         self.screen.getkey()
 
@@ -1319,6 +1352,12 @@ class Tui(object):
                 msg = '"' + str(val) + '": Invalid value'
         self._refresh_header()
 
+    def _is_running_guest(self, pid):
+        """Check if pid is still a running process."""
+        if not pid:
+            return True
+        return os.path.isdir(os.path.join('/proc/', str(pid)))
+
     def _show_vm_selection_by_guest(self):
         """Draws guest selection mask.
 
@@ -1346,7 +1385,7 @@ class Tui(object):
             if not guest or guest == '0':
                 break
             if guest.isdigit():
-                if not os.path.isdir(os.path.join('/proc/', guest)):
+                if not self._is_running_guest(guest):
                     msg = '"' + guest + '": Not a running process'
                     continue
                 pid = int(guest)
index 91aaf73b00df8a385a28c2a221505b6ba82062b1..ed162a6c57c597d89a7f08600b84a4425feb0b13 100644 (file)
@@ -1817,18 +1817,6 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *dat
        return 0;
 }
 
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
-{
-       unsigned long end = hva + PAGE_SIZE;
-
-       if (!kvm->arch.pgd)
-               return 0;
-
-       trace_kvm_unmap_hva(hva);
-       handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
-       return 0;
-}
-
 int kvm_unmap_hva_range(struct kvm *kvm,
                        unsigned long start, unsigned long end)
 {
@@ -1860,13 +1848,20 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
        unsigned long end = hva + PAGE_SIZE;
+       kvm_pfn_t pfn = pte_pfn(pte);
        pte_t stage2_pte;
 
        if (!kvm->arch.pgd)
                return;
 
        trace_kvm_set_spte_hva(hva);
-       stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
+
+       /*
+        * We've moved a page around, probably through CoW, so let's treat it
+        * just like a translation fault and clean the cache to the PoC.
+        */
+       clean_dcache_guest_page(pfn, PAGE_SIZE);
+       stage2_pte = pfn_pte(pfn, PAGE_S2);
        handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
 }
 
index e53b596f483b99f0a7f9d897938b30f47a079316..57b3edebbb4043e7cbeef0f834ccc2a1848caf78 100644 (file)
@@ -134,21 +134,6 @@ TRACE_EVENT(kvm_mmio_emulate,
                  __entry->vcpu_pc, __entry->instr, __entry->cpsr)
 );
 
-TRACE_EVENT(kvm_unmap_hva,
-       TP_PROTO(unsigned long hva),
-       TP_ARGS(hva),
-
-       TP_STRUCT__entry(
-               __field(        unsigned long,  hva             )
-       ),
-
-       TP_fast_assign(
-               __entry->hva            = hva;
-       ),
-
-       TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva)
-);
-
 TRACE_EVENT(kvm_unmap_hva_range,
        TP_PROTO(unsigned long start, unsigned long end),
        TP_ARGS(start, end),