Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 22 Aug 2018 20:52:44 +0000 (13:52 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 22 Aug 2018 20:52:44 +0000 (13:52 -0700)
Pull second set of KVM updates from Paolo Bonzini:
 "ARM:
   - Support for Group0 interrupts in guests
   - Cache management optimizations for ARMv8.4 systems
   - Userspace interface for RAS
   - Fault path optimization
   - Emulated physical timer fixes
   - Random cleanups

  x86:
   - fixes for L1TF
   - a new test case
   - non-support for SGX (inject the right exception in the guest)
   - fix lockdep false positive"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (49 commits)
  KVM: VMX: fixes for vmentry_l1d_flush module parameter
  kvm: selftest: add dirty logging test
  kvm: selftest: pass in extra memory when create vm
  kvm: selftest: include the tools headers
  kvm: selftest: unify the guest port macros
  tools: introduce test_and_clear_bit
  KVM: x86: SVM: Call x86_spec_ctrl_set_guest/host() with interrupts disabled
  KVM: vmx: Inject #UD for SGX ENCLS instruction in guest
  KVM: vmx: Add defines for SGX ENCLS exiting
  x86/kvm/vmx: Fix coding style in vmx_setup_l1d_flush()
  x86: kvm: avoid unused variable warning
  KVM: Documentation: rename the capability of KVM_CAP_ARM_SET_SERROR_ESR
  KVM: arm/arm64: Skip updating PTE entry if no change
  KVM: arm/arm64: Skip updating PMD entry if no change
  KVM: arm: Use true and false for boolean values
  KVM: arm/arm64: vgic: Do not use spin_lock_irqsave/restore with irq disabled
  KVM: arm/arm64: vgic: Move DEBUG_SPINLOCK_BUG_ON to vgic.h
  KVM: arm: vgic-v3: Add support for ICC_SGI0R and ICC_ASGI1R accesses
  KVM: arm64: vgic-v3: Add support for ICC_SGI0R_EL1 and ICC_ASGI1R_EL1 accesses
  KVM: arm/arm64: vgic-v3: Add core support for Group0 SGIs
  ...

59 files changed:
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/devices/arm-vgic-v3.txt
Documentation/virtual/kvm/devices/arm-vgic.txt
arch/arm/include/asm/kvm_emulate.h
arch/arm/include/asm/kvm_host.h
arch/arm/include/asm/kvm_mmu.h
arch/arm/include/uapi/asm/kvm.h
arch/arm/kvm/coproc.c
arch/arm/kvm/guest.c
arch/arm64/include/asm/cpucaps.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/pgtable-prot.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/uapi/asm/kvm.h
arch/arm64/kernel/cpufeature.c
arch/arm64/kvm/guest.c
arch/arm64/kvm/hyp-init.S
arch/arm64/kvm/hyp/sysreg-sr.c
arch/arm64/kvm/inject_fault.c
arch/arm64/kvm/reset.c
arch/arm64/kvm/sys_regs.c
arch/x86/include/asm/vmx.h
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
include/kvm/arm_vgic.h
include/linux/irqchip/arm-gic-v3.h
include/linux/irqchip/arm-gic.h
include/uapi/linux/kvm.h
tools/include/linux/bitmap.h
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
tools/testing/selftests/kvm/dirty_log_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/include/kvm_util.h
tools/testing/selftests/kvm/include/test_util.h
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/x86.c
tools/testing/selftests/kvm/set_sregs_test.c
tools/testing/selftests/kvm/state_test.c
tools/testing/selftests/kvm/sync_regs_test.c
tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
virt/kvm/arm/arch_timer.c
virt/kvm/arm/arm.c
virt/kvm/arm/mmu.c
virt/kvm/arm/vgic/vgic-debug.c
virt/kvm/arm/vgic/vgic-init.c
virt/kvm/arm/vgic/vgic-its.c
virt/kvm/arm/vgic/vgic-mmio-v2.c
virt/kvm/arm/vgic/vgic-mmio-v3.c
virt/kvm/arm/vgic/vgic-mmio.c
virt/kvm/arm/vgic/vgic-mmio.h
virt/kvm/arm/vgic/vgic-v2.c
virt/kvm/arm/vgic/vgic-v3.c
virt/kvm/arm/vgic/vgic.c
virt/kvm/arm/vgic/vgic.h

index 7b83b176c662ce24a014299a69be27e28452a14d..c664064f76fb60ccc434988266776697660d0a5f 100644 (file)
@@ -835,11 +835,13 @@ struct kvm_clock_data {
 
 Capability: KVM_CAP_VCPU_EVENTS
 Extended by: KVM_CAP_INTR_SHADOW
-Architectures: x86
-Type: vm ioctl
+Architectures: x86, arm, arm64
+Type: vcpu ioctl
 Parameters: struct kvm_vcpu_event (out)
 Returns: 0 on success, -1 on error
 
+X86:
+
 Gets currently pending exceptions, interrupts, and NMIs as well as related
 states of the vcpu.
 
@@ -881,15 +883,64 @@ Only two fields are defined in the flags field:
 - KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that
   smi contains a valid state.
 
+ARM/ARM64:
+
+If the guest accesses a device that is being emulated by the host kernel in
+such a way that a real device would generate a physical SError, KVM may make
+a virtual SError pending for that VCPU. This system error interrupt remains
+pending until the guest takes the exception by unmasking PSTATE.A.
+
+Running the VCPU may cause it to take a pending SError, or make an access that
+causes an SError to become pending. The event's description is only valid while
+the VPCU is not running.
+
+This API provides a way to read and write the pending 'event' state that is not
+visible to the guest. To save, restore or migrate a VCPU the struct representing
+the state can be read then written using this GET/SET API, along with the other
+guest-visible registers. It is not possible to 'cancel' an SError that has been
+made pending.
+
+A device being emulated in user-space may also wish to generate an SError. To do
+this the events structure can be populated by user-space. The current state
+should be read first, to ensure no existing SError is pending. If an existing
+SError is pending, the architecture's 'Multiple SError interrupts' rules should
+be followed. (2.5.3 of DDI0587.a "ARM Reliability, Availability, and
+Serviceability (RAS) Specification").
+
+SError exceptions always have an ESR value. Some CPUs have the ability to
+specify what the virtual SError's ESR value should be. These systems will
+advertise KVM_CAP_ARM_INJECT_SERROR_ESR. In this case exception.has_esr will
+always have a non-zero value when read, and the agent making an SError pending
+should specify the ISS field in the lower 24 bits of exception.serror_esr. If
+the system supports KVM_CAP_ARM_INJECT_SERROR_ESR, but user-space sets the events
+with exception.has_esr as zero, KVM will choose an ESR.
+
+Specifying exception.has_esr on a system that does not support it will return
+-EINVAL. Setting anything other than the lower 24bits of exception.serror_esr
+will return -EINVAL.
+
+struct kvm_vcpu_events {
+       struct {
+               __u8 serror_pending;
+               __u8 serror_has_esr;
+               /* Align it to 8 bytes */
+               __u8 pad[6];
+               __u64 serror_esr;
+       } exception;
+       __u32 reserved[12];
+};
+
 4.32 KVM_SET_VCPU_EVENTS
 
 Capability: KVM_CAP_VCPU_EVENTS
 Extended by: KVM_CAP_INTR_SHADOW
-Architectures: x86
-Type: vm ioctl
+Architectures: x86, arm, arm64
+Type: vcpu ioctl
 Parameters: struct kvm_vcpu_event (in)
 Returns: 0 on success, -1 on error
 
+X86:
+
 Set pending exceptions, interrupts, and NMIs as well as related states of the
 vcpu.
 
@@ -910,6 +961,13 @@ shall be written into the VCPU.
 
 KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available.
 
+ARM/ARM64:
+
+Set the pending SError exception state for this VCPU. It is not possible to
+'cancel' an Serror that has been made pending.
+
+See KVM_GET_VCPU_EVENTS for the data structure.
+
 
 4.33 KVM_GET_DEBUGREGS
 
@@ -4690,3 +4748,17 @@ This capability indicates that KVM supports paravirtualized Hyper-V TLB Flush
 hypercalls:
 HvFlushVirtualAddressSpace, HvFlushVirtualAddressSpaceEx,
 HvFlushVirtualAddressList, HvFlushVirtualAddressListEx.
+
+8.19 KVM_CAP_ARM_INJECT_SERROR_ESR
+
+Architectures: arm, arm64
+
+This capability indicates that userspace can specify (via the
+KVM_SET_VCPU_EVENTS ioctl) the syndrome value reported to the guest when it
+takes a virtual SError interrupt exception.
+If KVM advertises this capability, userspace can only specify the ISS field for
+the ESR syndrome. Other parts of the ESR, such as the EC are generated by the
+CPU when the exception is taken. If this virtual SError is taken to EL1 using
+AArch64, this value will be reported in the ISS field of ESR_ELx.
+
+See KVM_CAP_VCPU_EVENTS for more details.
index 2408ab720ef7bae0bcd4736bc30ee475fd00e8ce..ff290b43c8e513175990d76337653299f87d7a93 100644 (file)
@@ -100,6 +100,14 @@ Groups:
     Note that distributor fields are not banked, but return the same value
     regardless of the mpidr used to access the register.
 
+    GICD_IIDR.Revision is updated when the KVM implementation is changed in a
+    way directly observable by the guest or userspace.  Userspace should read
+    GICD_IIDR from KVM and write back the read value to confirm its expected
+    behavior is aligned with the KVM implementation.  Userspace should set
+    GICD_IIDR before setting any other registers to ensure the expected
+    behavior.
+
+
     The GICD_STATUSR and GICR_STATUSR registers are architecturally defined such
     that a write of a clear bit has no effect, whereas a write with a set bit
     clears that value.  To allow userspace to freely set the values of these two
index b3ce12643553f3edbe327029bbfe61ecaa8cc506..97b6518148f87befcc2dab5fb250459607154525 100644 (file)
@@ -49,9 +49,15 @@ Groups:
     index is specified with the vcpu_index field.  Note that most distributor
     fields are not banked, but return the same value regardless of the
     vcpu_index used to access the register.
-  Limitations:
-    - Priorities are not implemented, and registers are RAZ/WI
-    - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
+
+    GICD_IIDR.Revision is updated when the KVM implementation of an emulated
+    GICv2 is changed in a way directly observable by the guest or userspace.
+    Userspace should read GICD_IIDR from KVM and write back the read value to
+    confirm its expected behavior is aligned with the KVM implementation.
+    Userspace should set GICD_IIDR before setting any other registers (both
+    KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS) to ensure
+    the expected behavior. Unless GICD_IIDR has been set from userspace, writes
+    to the interrupt group registers (GICD_IGROUPR) are ignored.
   Errors:
     -ENXIO: Getting or setting this register is not yet supported
     -EBUSY: One or more VCPUs are running
@@ -94,9 +100,6 @@ Groups:
     use the lower 5 bits to communicate with the KVM device and must shift the
     value left by 3 places to obtain the actual priority mask level.
 
-  Limitations:
-    - Priorities are not implemented, and registers are RAZ/WI
-    - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
   Errors:
     -ENXIO: Getting or setting this register is not yet supported
     -EBUSY: One or more VCPUs are running
index fe2fb1ddd7715bc55182f20f47cd4bd35d40a88d..77121b713bef3adbc4b2c77c559dab1886dec96b 100644 (file)
@@ -107,9 +107,19 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
        return (unsigned long *)&vcpu->arch.hcr;
 }
 
+static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.hcr &= ~HCR_TWE;
+}
+
+static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.hcr |= HCR_TWE;
+}
+
 static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu)
 {
-       return 1;
+       return true;
 }
 
 static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu)
index 1f1fe4109b026690ab80ca0d3e31355f77e78f04..79906cecb091e4fbf8dafb850d9d4b7ea19c3c41 100644 (file)
@@ -216,6 +216,11 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 unsigned long kvm_call_hyp(void *hypfn, ...);
 void force_vm_exit(const cpumask_t *mask);
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+                             struct kvm_vcpu_events *events);
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+                             struct kvm_vcpu_events *events);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
index 8553d68b7c8a2ac34ec7b22daa44400c1c3b5958..265ea9cf7df773bf7d49926d3eed34be25032463 100644 (file)
@@ -75,17 +75,9 @@ phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
-static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd)
-{
-       *pmd = new_pmd;
-       dsb(ishst);
-}
-
-static inline void kvm_set_pte(pte_t *pte, pte_t new_pte)
-{
-       *pte = new_pte;
-       dsb(ishst);
-}
+#define kvm_mk_pmd(ptep)       __pmd(__pa(ptep) | PMD_TYPE_TABLE)
+#define kvm_mk_pud(pmdp)       __pud(__pa(pmdp) | PMD_TYPE_TABLE)
+#define kvm_mk_pgd(pudp)       ({ BUILD_BUG(); 0; })
 
 static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
 {
index 16e006f708ca0cbd44a63135bb996b8db7c3ba9e..4602464ebdfbfccd9296593ed5dd8b15f55305ff 100644 (file)
@@ -27,6 +27,7 @@
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
@@ -125,6 +126,18 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 };
 
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+       struct {
+               __u8 serror_pending;
+               __u8 serror_has_esr;
+               /* Align it to 8 bytes */
+               __u8 pad[6];
+               __u64 serror_esr;
+       } exception;
+       __u32 reserved[12];
+};
+
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK                0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT       16
index 3a02e76699a635849d9a3bc1e95d3ef3b76387a5..450c7a4fbc8a15b10bfd0a7e73b0878291e392c9 100644 (file)
@@ -246,6 +246,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
                           const struct coproc_reg *r)
 {
        u64 reg;
+       bool g1;
 
        if (!p->is_write)
                return read_from_write_only(vcpu, p);
@@ -253,7 +254,25 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
        reg = (u64)*vcpu_reg(vcpu, p->Rt2) << 32;
        reg |= *vcpu_reg(vcpu, p->Rt1) ;
 
-       vgic_v3_dispatch_sgi(vcpu, reg);
+       /*
+        * In a system where GICD_CTLR.DS=1, a ICC_SGI0R access generates
+        * Group0 SGIs only, while ICC_SGI1R can generate either group,
+        * depending on the SGI configuration. ICC_ASGI1R is effectively
+        * equivalent to ICC_SGI0R, as there is no "alternative" secure
+        * group.
+        */
+       switch (p->Op1) {
+       default:                /* Keep GCC quiet */
+       case 0:                 /* ICC_SGI1R */
+               g1 = true;
+               break;
+       case 1:                 /* ICC_ASGI1R */
+       case 2:                 /* ICC_SGI0R */
+               g1 = false;
+               break;
+       }
+
+       vgic_v3_dispatch_sgi(vcpu, reg, g1);
 
        return true;
 }
@@ -459,6 +478,10 @@ static const struct coproc_reg cp15_regs[] = {
 
        /* ICC_SGI1R */
        { CRm64(12), Op1( 0), is64, access_gic_sgi},
+       /* ICC_ASGI1R */
+       { CRm64(12), Op1( 1), is64, access_gic_sgi},
+       /* ICC_SGI0R */
+       { CRm64(12), Op1( 2), is64, access_gic_sgi},
 
        /* VBAR: swapped by interrupt.S. */
        { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32,
index a18f33edc471a92fcf6194dbe6601bc4b94f1988..2b8de885b2bf6b8e61ffd1c67adf72a56167633a 100644 (file)
@@ -261,6 +261,29 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        return -EINVAL;
 }
 
+
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+                             struct kvm_vcpu_events *events)
+{
+       events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA);
+
+       return 0;
+}
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+                             struct kvm_vcpu_events *events)
+{
+       bool serror_pending = events->exception.serror_pending;
+       bool has_esr = events->exception.serror_has_esr;
+
+       if (serror_pending && has_esr)
+               return -EINVAL;
+       else if (serror_pending)
+               kvm_inject_vabt(vcpu);
+
+       return 0;
+}
+
 int __attribute_const__ kvm_target_cpu(void)
 {
        switch (read_cpuid_part()) {
index be3bf3d08916ca811b9294c890adbae1f91b9783..ae1f70450fb2129b5c195be0678ae9935bb3a450 100644 (file)
@@ -50,7 +50,8 @@
 #define ARM64_HW_DBM                           29
 #define ARM64_SSBD                             30
 #define ARM64_MISMATCHED_CACHE_TYPE            31
+#define ARM64_HAS_STAGE2_FWB                   32
 
-#define ARM64_NCAPS                            32
+#define ARM64_NCAPS                            33
 
 #endif /* __ASM_CPUCAPS_H */
index 6dd285e979c98bc0593dee460c68d12d3d5f0b22..aa45df752a166a56a3ace4808db6b985ac8d2898 100644 (file)
@@ -23,6 +23,7 @@
 #include <asm/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
+#define HCR_FWB                (UL(1) << 46)
 #define HCR_TEA                (UL(1) << 37)
 #define HCR_TERR       (UL(1) << 36)
 #define HCR_TLOR       (UL(1) << 35)
index 0c97e45d1dc3e2e54946f5d9f59c92b80ec93485..6106a85ae0be70f91f8ad7b64bd7723e236843e5 100644 (file)
@@ -63,6 +63,8 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
                /* trap error record accesses */
                vcpu->arch.hcr_el2 |= HCR_TERR;
        }
+       if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+               vcpu->arch.hcr_el2 |= HCR_FWB;
 
        if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
                vcpu->arch.hcr_el2 &= ~HCR_RW;
@@ -81,6 +83,21 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
        return (unsigned long *)&vcpu->arch.hcr_el2;
 }
 
+static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.hcr_el2 &= ~HCR_TWE;
+}
+
+static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.hcr_el2 |= HCR_TWE;
+}
+
+static inline unsigned long vcpu_get_vsesr(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.vsesr_el2;
+}
+
 static inline void vcpu_set_vsesr(struct kvm_vcpu *vcpu, u64 vsesr)
 {
        vcpu->arch.vsesr_el2 = vsesr;
index fe8777b12f8667c2c0b23952057fc13041276442..f26055f2306e1f9a479417507c2edf428c9f99de 100644 (file)
@@ -350,6 +350,11 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+                             struct kvm_vcpu_events *events);
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+                             struct kvm_vcpu_events *events);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
@@ -378,16 +383,23 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
+
 struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
 
-void __kvm_set_tpidr_el2(u64 tpidr_el2);
 DECLARE_PER_CPU(kvm_cpu_context_t, kvm_host_cpu_state);
 
 static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
                                       unsigned long hyp_stack_ptr,
                                       unsigned long vector_ptr)
 {
-       u64 tpidr_el2;
+       /*
+        * Calculate the raw per-cpu offset without a translation from the
+        * kernel's mapping to the linear mapping, and store it in tpidr_el2
+        * so that we can use adr_l to access per-cpu variables in EL2.
+        */
+       u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_cpu_state) -
+                        (u64)kvm_ksym_ref(kvm_host_cpu_state));
 
        /*
         * Call initialization code, and switch to the full blown HYP code.
@@ -396,17 +408,7 @@ static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr,
         * cpus_have_const_cap() wrapper.
         */
        BUG_ON(!static_branch_likely(&arm64_const_caps_ready));
-       __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr);
-
-       /*
-        * Calculate the raw per-cpu offset without a translation from the
-        * kernel's mapping to the linear mapping, and store it in tpidr_el2
-        * so that we can use adr_l to access per-cpu variables in EL2.
-        */
-       tpidr_el2 = (u64)this_cpu_ptr(&kvm_host_cpu_state)
-               - (u64)kvm_ksym_ref(kvm_host_cpu_state);
-
-       kvm_call_hyp(__kvm_set_tpidr_el2, tpidr_el2);
+       __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2);
 }
 
 static inline bool kvm_arch_check_sve_has_vhe(void)
index fb9a7127bb752d6e627e873962cf39d6b1752687..d6fff7de5539f22ffa91202c587e13ee48d7d4de 100644 (file)
@@ -169,8 +169,12 @@ phys_addr_t kvm_get_idmap_vector(void);
 int kvm_mmu_init(void);
 void kvm_clear_hyp_idmap(void);
 
-#define        kvm_set_pte(ptep, pte)          set_pte(ptep, pte)
-#define        kvm_set_pmd(pmdp, pmd)          set_pmd(pmdp, pmd)
+#define kvm_mk_pmd(ptep)                                       \
+       __pmd(__phys_to_pmd_val(__pa(ptep)) | PMD_TYPE_TABLE)
+#define kvm_mk_pud(pmdp)                                       \
+       __pud(__phys_to_pud_val(__pa(pmdp)) | PMD_TYPE_TABLE)
+#define kvm_mk_pgd(pudp)                                       \
+       __pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE)
 
 static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
 {
@@ -267,6 +271,15 @@ static inline void __clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size)
 {
        void *va = page_address(pfn_to_page(pfn));
 
+       /*
+        * With FWB, we ensure that the guest always accesses memory using
+        * cacheable attributes, and we don't have to clean to PoC when
+        * faulting in pages. Furthermore, FWB implies IDC, so cleaning to
+        * PoU is not required either in this case.
+        */
+       if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+               return;
+
        kvm_flush_dcache_to_poc(va, size);
 }
 
@@ -287,20 +300,26 @@ static inline void __invalidate_icache_guest_page(kvm_pfn_t pfn,
 
 static inline void __kvm_flush_dcache_pte(pte_t pte)
 {
-       struct page *page = pte_page(pte);
-       kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
+       if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
+               struct page *page = pte_page(pte);
+               kvm_flush_dcache_to_poc(page_address(page), PAGE_SIZE);
+       }
 }
 
 static inline void __kvm_flush_dcache_pmd(pmd_t pmd)
 {
-       struct page *page = pmd_page(pmd);
-       kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
+       if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
+               struct page *page = pmd_page(pmd);
+               kvm_flush_dcache_to_poc(page_address(page), PMD_SIZE);
+       }
 }
 
 static inline void __kvm_flush_dcache_pud(pud_t pud)
 {
-       struct page *page = pud_page(pud);
-       kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
+       if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
+               struct page *page = pud_page(pud);
+               kvm_flush_dcache_to_poc(page_address(page), PUD_SIZE);
+       }
 }
 
 #define kvm_virt_to_phys(x)            __pa_symbol(x)
index 49d99214f43c5011f2187fb5bc12c6d4537cd307..b96442960aead1692e94d28dd35d33e7954d84b3 100644 (file)
 #define MT_S2_NORMAL           0xf
 #define MT_S2_DEVICE_nGnRE     0x1
 
+/*
+ * Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001
+ * Stage-2 enforces Normal-WB and Device-nGnRE
+ */
+#define MT_S2_FWB_NORMAL       6
+#define MT_S2_FWB_DEVICE_nGnRE 1
+
 #ifdef CONFIG_ARM64_4K_PAGES
 #define IOREMAP_MAX_ORDER      (PUD_SHIFT)
 #else
index 108ecad7acc5617aa3c6c7573ba578b9e485f78d..78b942c1bea437c5a0e9124b86afd4c48503d999 100644 (file)
 #define PAGE_HYP_RO            __pgprot(_HYP_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN)
 #define PAGE_HYP_DEVICE                __pgprot(PROT_DEVICE_nGnRE | PTE_HYP)
 
-#define PAGE_S2                        __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY | PTE_S2_XN)
-#define PAGE_S2_DEVICE         __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_S2_XN)
+#define PAGE_S2_MEMATTR(attr)                                          \
+       ({                                                              \
+               u64 __val;                                              \
+               if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))          \
+                       __val = PTE_S2_MEMATTR(MT_S2_FWB_ ## attr);     \
+               else                                                    \
+                       __val = PTE_S2_MEMATTR(MT_S2_ ## attr);         \
+               __val;                                                  \
+        })
+
+#define PAGE_S2_XN                                                     \
+       ({                                                              \
+               u64 __val;                                              \
+               if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))           \
+                       __val = 0;                                      \
+               else                                                    \
+                       __val = PTE_S2_XN;                              \
+               __val;                                                  \
+       })
+
+#define PAGE_S2                        __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(NORMAL) | PTE_S2_RDONLY | PAGE_S2_XN)
+#define PAGE_S2_DEVICE         __pgprot(_PROT_DEFAULT | PAGE_S2_MEMATTR(DEVICE_nGnRE) | PTE_S2_RDONLY | PAGE_S2_XN)
 
 #define PAGE_NONE              __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
 #define PAGE_SHARED            __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE)
index e205ec8489e9c04f46de172585d679a28d6ae418..c1470931b8974936ed2a86fb231c15764d08f573 100644 (file)
 #define SYS_ICC_DIR_EL1                        sys_reg(3, 0, 12, 11, 1)
 #define SYS_ICC_RPR_EL1                        sys_reg(3, 0, 12, 11, 3)
 #define SYS_ICC_SGI1R_EL1              sys_reg(3, 0, 12, 11, 5)
+#define SYS_ICC_ASGI1R_EL1             sys_reg(3, 0, 12, 11, 6)
+#define SYS_ICC_SGI0R_EL1              sys_reg(3, 0, 12, 11, 7)
 #define SYS_ICC_IAR1_EL1               sys_reg(3, 0, 12, 12, 0)
 #define SYS_ICC_EOIR1_EL1              sys_reg(3, 0, 12, 12, 1)
 #define SYS_ICC_HPPIR1_EL1             sys_reg(3, 0, 12, 12, 2)
 #define ID_AA64MMFR1_VMIDBITS_16       2
 
 /* id_aa64mmfr2 */
+#define ID_AA64MMFR2_FWB_SHIFT         40
 #define ID_AA64MMFR2_AT_SHIFT          32
 #define ID_AA64MMFR2_LVA_SHIFT         16
 #define ID_AA64MMFR2_IESB_SHIFT                12
index 4e76630dd6554673d71ad647c1108bb54f1bcea2..97c3478ee6e718c8ac2de4c01edee6a0dd4cd27e 100644 (file)
@@ -39,6 +39,7 @@
 #define __KVM_HAVE_GUEST_DEBUG
 #define __KVM_HAVE_IRQ_LINE
 #define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 
@@ -154,6 +155,18 @@ struct kvm_sync_regs {
 struct kvm_arch_memory_slot {
 };
 
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+       struct {
+               __u8 serror_pending;
+               __u8 serror_has_esr;
+               /* Align it to 8 bytes */
+               __u8 pad[6];
+               __u64 serror_esr;
+       } exception;
+       __u32 reserved[12];
+};
+
 /* If you need to interpret the index values, here is the key: */
 #define KVM_REG_ARM_COPROC_MASK                0x000000000FFF0000
 #define KVM_REG_ARM_COPROC_SHIFT       16
index 611e8921c3d4d91ba8d687ff89fcb90757b12555..e238b7932096d5641de25a6f16a9292cc1f3d517 100644 (file)
@@ -192,6 +192,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = {
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0),
@@ -1026,6 +1027,14 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
 }
 #endif
 
+static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused)
+{
+       u64 val = read_sysreg_s(SYS_CLIDR_EL1);
+
+       /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */
+       WARN_ON(val & (7 << 27 | 7 << 21));
+}
+
 static const struct arm64_cpu_capabilities arm64_features[] = {
        {
                .desc = "GIC system register CPU interface",
@@ -1182,6 +1191,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
                .matches = has_cache_dic,
        },
+       {
+               .desc = "Stage-2 Force Write-Back",
+               .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+               .capability = ARM64_HAS_STAGE2_FWB,
+               .sys_reg = SYS_ID_AA64MMFR2_EL1,
+               .sign = FTR_UNSIGNED,
+               .field_pos = ID_AA64MMFR2_FWB_SHIFT,
+               .min_field_value = 1,
+               .matches = has_cpuid_feature,
+               .cpu_enable = cpu_has_fwb,
+       },
 #ifdef CONFIG_ARM64_HW_AFDBM
        {
                /*
index cdd4d9d6d57548cc58dbbdaecdfb1e4e18685df8..07256b08226c0c935d7ced6530a4a7a85ee4c276 100644 (file)
@@ -289,6 +289,39 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
        return -EINVAL;
 }
 
+int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+                             struct kvm_vcpu_events *events)
+{
+       events->exception.serror_pending = !!(vcpu->arch.hcr_el2 & HCR_VSE);
+       events->exception.serror_has_esr = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
+
+       if (events->exception.serror_pending && events->exception.serror_has_esr)
+               events->exception.serror_esr = vcpu_get_vsesr(vcpu);
+
+       return 0;
+}
+
+int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+                             struct kvm_vcpu_events *events)
+{
+       bool serror_pending = events->exception.serror_pending;
+       bool has_esr = events->exception.serror_has_esr;
+
+       if (serror_pending && has_esr) {
+               if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
+                       return -EINVAL;
+
+               if (!((events->exception.serror_esr) & ~ESR_ELx_ISS_MASK))
+                       kvm_set_sei_esr(vcpu, events->exception.serror_esr);
+               else
+                       return -EINVAL;
+       } else if (serror_pending) {
+               kvm_inject_vabt(vcpu);
+       }
+
+       return 0;
+}
+
 int __attribute_const__ kvm_target_cpu(void)
 {
        unsigned long implementor = read_cpuid_implementor();
index 6fd91b31a131857c9dd70002245f99220adc5a31..ea92251607862db0b33f9ef416ae86e8463b2d03 100644 (file)
@@ -57,6 +57,7 @@ __invalid:
         * x0: HYP pgd
         * x1: HYP stack
         * x2: HYP vectors
+        * x3: per-CPU offset
         */
 __do_hyp_init:
        /* Check for a stub HVC call */
@@ -119,9 +120,8 @@ CPU_BE(     orr     x4, x4, #SCTLR_ELx_EE)
        mov     sp, x1
        msr     vbar_el2, x2
 
-       /* copy tpidr_el1 into tpidr_el2 for use by HYP */
-       mrs     x1, tpidr_el1
-       msr     tpidr_el2, x1
+       /* Set tpidr_el2 for use by HYP */
+       msr     tpidr_el2, x3
 
        /* Hello, World! */
        eret
index 35bc16832efe28ae3e6a85d4e906f1e94024f0c4..9ce223944983b803e3b4161d57a5ff6e17e8ec5b 100644 (file)
@@ -288,8 +288,3 @@ void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu)
 
        vcpu->arch.sysregs_loaded_on_cpu = false;
 }
-
-void __hyp_text __kvm_set_tpidr_el2(u64 tpidr_el2)
-{
-       asm("msr tpidr_el2, %0": : "r" (tpidr_el2));
-}
index d8e71659ba7e1bed0d7d21c94ff0931188b8118d..a55e91dfcf8fe8279c2dbe40c9747d26cb086e26 100644 (file)
@@ -164,9 +164,9 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
                inject_undef64(vcpu);
 }
 
-static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
+void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 esr)
 {
-       vcpu_set_vsesr(vcpu, esr);
+       vcpu_set_vsesr(vcpu, esr & ESR_ELx_ISS_MASK);
        *vcpu_hcr(vcpu) |= HCR_VSE;
 }
 
@@ -184,5 +184,5 @@ static void pend_guest_serror(struct kvm_vcpu *vcpu, u64 esr)
  */
 void kvm_inject_vabt(struct kvm_vcpu *vcpu)
 {
-       pend_guest_serror(vcpu, ESR_ELx_ISV);
+       kvm_set_sei_esr(vcpu, ESR_ELx_ISV);
 }
index 4e4aedaf7ab7152e93983fffc95b65579cd378e5..e37c78bbe1ca9cc750c354412ef64092af5fc5dc 100644 (file)
@@ -77,8 +77,12 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_ARM_PMU_V3:
                r = kvm_arm_support_pmu_v3();
                break;
+       case KVM_CAP_ARM_INJECT_SERROR_ESR:
+               r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN);
+               break;
        case KVM_CAP_SET_GUEST_DEBUG:
        case KVM_CAP_VCPU_ATTRIBUTES:
+       case KVM_CAP_VCPU_EVENTS:
                r = 1;
                break;
        default:
index a4363735d3f8e11e1cac77f57cab32d23fac4915..22fbbdbece3caf19fcc070c736f30956c4419c8c 100644 (file)
@@ -194,7 +194,16 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
        if (!p->is_write)
                return read_from_write_only(vcpu, p, r);
 
-       kvm_set_way_flush(vcpu);
+       /*
+        * Only track S/W ops if we don't have FWB. It still indicates
+        * that the guest is a bit broken (S/W operations should only
+        * be done by firmware, knowing that there is only a single
+        * CPU left in the system, and certainly not from non-secure
+        * software).
+        */
+       if (!cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+               kvm_set_way_flush(vcpu);
+
        return true;
 }
 
@@ -243,10 +252,43 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
                           struct sys_reg_params *p,
                           const struct sys_reg_desc *r)
 {
+       bool g1;
+
        if (!p->is_write)
                return read_from_write_only(vcpu, p, r);
 
-       vgic_v3_dispatch_sgi(vcpu, p->regval);
+       /*
+        * In a system where GICD_CTLR.DS=1, a ICC_SGI0R_EL1 access generates
+        * Group0 SGIs only, while ICC_SGI1R_EL1 can generate either group,
+        * depending on the SGI configuration. ICC_ASGI1R_EL1 is effectively
+        * equivalent to ICC_SGI0R_EL1, as there is no "alternative" secure
+        * group.
+        */
+       if (p->is_aarch32) {
+               switch (p->Op1) {
+               default:                /* Keep GCC quiet */
+               case 0:                 /* ICC_SGI1R */
+                       g1 = true;
+                       break;
+               case 1:                 /* ICC_ASGI1R */
+               case 2:                 /* ICC_SGI0R */
+                       g1 = false;
+                       break;
+               }
+       } else {
+               switch (p->Op2) {
+               default:                /* Keep GCC quiet */
+               case 5:                 /* ICC_SGI1R_EL1 */
+                       g1 = true;
+                       break;
+               case 6:                 /* ICC_ASGI1R_EL1 */
+               case 7:                 /* ICC_SGI0R_EL1 */
+                       g1 = false;
+                       break;
+               }
+       }
+
+       vgic_v3_dispatch_sgi(vcpu, p->regval, g1);
 
        return true;
 }
@@ -1303,6 +1345,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        { SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only },
        { SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only },
        { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi },
+       { SYS_DESC(SYS_ICC_ASGI1R_EL1), access_gic_sgi },
+       { SYS_DESC(SYS_ICC_SGI0R_EL1), access_gic_sgi },
        { SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only },
        { SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only },
        { SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only },
@@ -1613,8 +1657,6 @@ static const struct sys_reg_desc cp14_64_regs[] = {
  * register).
  */
 static const struct sys_reg_desc cp15_regs[] = {
-       { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
-
        { Op1( 0), CRn( 1), CRm( 0), Op2( 0), access_vm_reg, NULL, c1_SCTLR },
        { Op1( 0), CRn( 2), CRm( 0), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
        { Op1( 0), CRn( 2), CRm( 0), Op2( 1), access_vm_reg, NULL, c2_TTBR1 },
@@ -1737,8 +1779,10 @@ static const struct sys_reg_desc cp15_regs[] = {
 static const struct sys_reg_desc cp15_64_regs[] = {
        { Op1( 0), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR0 },
        { Op1( 0), CRn( 0), CRm( 9), Op2( 0), access_pmu_evcntr },
-       { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi },
+       { Op1( 0), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI1R */
        { Op1( 1), CRn( 0), CRm( 2), Op2( 0), access_vm_reg, NULL, c2_TTBR1 },
+       { Op1( 1), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_ASGI1R */
+       { Op1( 2), CRn( 0), CRm(12), Op2( 0), access_gic_sgi }, /* ICC_SGI0R */
        { Op1( 2), CRn( 0), CRm(14), Op2( 0), access_cntp_cval },
 };
 
index 95f9107449bfb85c11b21b3c255edaeeb7e8ad87..9527ba5d62daba4fe5ca96d9c1ba2986f9612ccc 100644 (file)
@@ -74,6 +74,7 @@
 #define SECONDARY_EXEC_ENABLE_INVPCID          0x00001000
 #define SECONDARY_EXEC_ENABLE_VMFUNC            0x00002000
 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
+#define SECONDARY_EXEC_ENCLS_EXITING           0x00008000
 #define SECONDARY_EXEC_RDSEED_EXITING          0x00010000
 #define SECONDARY_EXEC_ENABLE_PML               0x00020000
 #define SECONDARY_EXEC_XSAVES                  0x00100000
@@ -213,6 +214,8 @@ enum vmcs_field {
        VMWRITE_BITMAP_HIGH             = 0x00002029,
        XSS_EXIT_BITMAP                 = 0x0000202C,
        XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
+       ENCLS_EXITING_BITMAP            = 0x0000202E,
+       ENCLS_EXITING_BITMAP_HIGH       = 0x0000202F,
        TSC_MULTIPLIER                  = 0x00002032,
        TSC_MULTIPLIER_HIGH             = 0x00002033,
        GUEST_PHYSICAL_ADDRESS          = 0x00002400,
index 73e27a98456fbd9cccf3bc1a2c7982cedb1302f0..6276140044d0848b58e6778c31cf4d247520ac44 100644 (file)
@@ -5586,8 +5586,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 
        clgi();
 
-       local_irq_enable();
-
        /*
         * If this vCPU has touched SPEC_CTRL, restore the guest's value if
         * it's non-zero. Since vmentry is serialising on affected CPUs, there
@@ -5596,6 +5594,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
         */
        x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
 
+       local_irq_enable();
+
        asm volatile (
                "push %%" _ASM_BP "; \n\t"
                "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
@@ -5718,12 +5718,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
        if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
                svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
 
-       x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
-
        reload_tss(vcpu);
 
        local_irq_disable();
 
+       x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
+
        vcpu->arch.cr2 = svm->vmcb->save.cr2;
        vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
        vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
index 1519f030fd73198a2dbac4a6a050cf79b587c2cf..8dae47e7267af8c77b6d14766e7b771166ca85d8 100644 (file)
@@ -198,12 +198,14 @@ static enum vmx_l1d_flush_state __read_mostly vmentry_l1d_flush_param = VMENTER_
 
 static const struct {
        const char *option;
-       enum vmx_l1d_flush_state cmd;
+       bool for_parse;
 } vmentry_l1d_param[] = {
-       {"auto",        VMENTER_L1D_FLUSH_AUTO},
-       {"never",       VMENTER_L1D_FLUSH_NEVER},
-       {"cond",        VMENTER_L1D_FLUSH_COND},
-       {"always",      VMENTER_L1D_FLUSH_ALWAYS},
+       [VMENTER_L1D_FLUSH_AUTO]         = {"auto", true},
+       [VMENTER_L1D_FLUSH_NEVER]        = {"never", true},
+       [VMENTER_L1D_FLUSH_COND]         = {"cond", true},
+       [VMENTER_L1D_FLUSH_ALWAYS]       = {"always", true},
+       [VMENTER_L1D_FLUSH_EPT_DISABLED] = {"EPT disabled", false},
+       [VMENTER_L1D_FLUSH_NOT_REQUIRED] = {"not required", false},
 };
 
 #define L1D_CACHE_ORDER 4
@@ -219,15 +221,15 @@ static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
                return 0;
        }
 
-       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
-              u64 msr;
+       if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
+               u64 msr;
 
-              rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
-              if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
-                      l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
-                      return 0;
-              }
-       }
+               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
+               if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+                       l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+                       return 0;
+               }
+       }
 
        /* If set to auto use the default l1tf mitigation method */
        if (l1tf == VMENTER_L1D_FLUSH_AUTO) {
@@ -287,8 +289,9 @@ static int vmentry_l1d_flush_parse(const char *s)
 
        if (s) {
                for (i = 0; i < ARRAY_SIZE(vmentry_l1d_param); i++) {
-                       if (sysfs_streq(s, vmentry_l1d_param[i].option))
-                               return vmentry_l1d_param[i].cmd;
+                       if (vmentry_l1d_param[i].for_parse &&
+                           sysfs_streq(s, vmentry_l1d_param[i].option))
+                               return i;
                }
        }
        return -EINVAL;
@@ -298,13 +301,13 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
 {
        int l1tf, ret;
 
-       if (!boot_cpu_has(X86_BUG_L1TF))
-               return 0;
-
        l1tf = vmentry_l1d_flush_parse(s);
        if (l1tf < 0)
                return l1tf;
 
+       if (!boot_cpu_has(X86_BUG_L1TF))
+               return 0;
+
        /*
         * Has vmx_init() run already? If not then this is the pre init
         * parameter parsing. In that case just store the value and let
@@ -324,6 +327,9 @@ static int vmentry_l1d_flush_set(const char *s, const struct kernel_param *kp)
 
 static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
 {
+       if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
+               return sprintf(s, "???\n");
+
        return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
 }
 
@@ -1684,6 +1690,12 @@ static inline bool cpu_has_vmx_virtual_intr_delivery(void)
                SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY;
 }
 
+static inline bool cpu_has_vmx_encls_vmexit(void)
+{
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+               SECONDARY_EXEC_ENCLS_EXITING;
+}
+
 /*
  * Comment's format: document - errata name - stepping - processor name.
  * Refer from
@@ -4551,7 +4563,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
                        SECONDARY_EXEC_RDRAND_EXITING |
                        SECONDARY_EXEC_ENABLE_PML |
                        SECONDARY_EXEC_TSC_SCALING |
-                       SECONDARY_EXEC_ENABLE_VMFUNC;
+                       SECONDARY_EXEC_ENABLE_VMFUNC |
+                       SECONDARY_EXEC_ENCLS_EXITING;
                if (adjust_vmx_controls(min2, opt2,
                                        MSR_IA32_VMX_PROCBASED_CTLS2,
                                        &_cpu_based_2nd_exec_control) < 0)
@@ -6648,6 +6661,9 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
                vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
                vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
        }
+
+       if (cpu_has_vmx_encls_vmexit())
+               vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
 }
 
 static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -9314,6 +9330,17 @@ fail:
        return 1;
 }
 
+static int handle_encls(struct kvm_vcpu *vcpu)
+{
+       /*
+        * SGX virtualization is not yet supported.  There is no software
+        * enable bit for SGX, so we have to trap ENCLS and inject a #UD
+        * to prevent the guest from executing ENCLS.
+        */
+       kvm_queue_exception(vcpu, UD_VECTOR);
+       return 1;
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -9371,6 +9398,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [EXIT_REASON_INVPCID]                 = handle_invpcid,
        [EXIT_REASON_VMFUNC]                  = handle_vmfunc,
        [EXIT_REASON_PREEMPTION_TIMER]        = handle_preemption_timer,
+       [EXIT_REASON_ENCLS]                   = handle_encls,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -9741,6 +9769,9 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
        case EXIT_REASON_VMFUNC:
                /* VM functions are emulated through L2->L0 vmexits. */
                return false;
+       case EXIT_REASON_ENCLS:
+               /* SGX is never exposed to L1 */
+               return false;
        default:
                return true;
        }
@@ -12101,6 +12132,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
                if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
                        vmcs_write64(APIC_ACCESS_ADDR, -1ull);
 
+               if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
+                       vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
+
                vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
        }
 
index 4a74a7cf0a8bf835800c0cb23d72b17bcb931efa..506bd2b4b8bb76e21a959310d33f6de6180719f8 100644 (file)
@@ -6576,14 +6576,12 @@ static void kvm_set_mmio_spte_mask(void)
        /* Set the present bit. */
        mask |= 1ull;
 
-#ifdef CONFIG_X86_64
        /*
         * If reserved bit is not supported, clear the present bit to disable
         * mmio page fault.
         */
-       if (maxphyaddr == 52)
+       if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52)
                mask &= ~1ull;
-#endif
 
        kvm_mmu_set_mmio_spte_mask(mask, mask);
 }
index cfdd2484cc4263375fbb3ba527a75db2789bbb36..4f31f96bbfabd06aad4691366861946fed570bbf 100644 (file)
@@ -133,6 +133,7 @@ struct vgic_irq {
        u8 source;                      /* GICv2 SGIs only */
        u8 active_source;               /* GICv2 SGIs only */
        u8 priority;
+       u8 group;                       /* 0 == group 0, 1 == group 1 */
        enum vgic_irq_config config;    /* Level or edge */
 
        /*
@@ -217,6 +218,12 @@ struct vgic_dist {
        /* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */
        u32                     vgic_model;
 
+       /* Implementation revision as reported in the GICD_IIDR */
+       u32                     implementation_rev;
+
+       /* Userspace can write to GICv2 IGROUPR */
+       bool                    v2_groups_user_writable;
+
        /* Do injected MSIs require an additional device ID? */
        bool                    msis_require_devid;
 
@@ -366,7 +373,7 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu);
 void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid);
 
-void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
+void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1);
 
 /**
  * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
index 9d2ea3e907d0fc018d8f7e13d2912537c83c5e70..8bdbb5f29494b494b47dadc8d57943eeb4011612 100644 (file)
 #define GICD_CTLR_ENABLE_G1A           (1U << 1)
 #define GICD_CTLR_ENABLE_G1            (1U << 0)
 
+#define GICD_IIDR_IMPLEMENTER_SHIFT    0
+#define GICD_IIDR_IMPLEMENTER_MASK     (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT       12
+#define GICD_IIDR_REVISION_MASK                (0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT                16
+#define GICD_IIDR_VARIANT_MASK         (0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT     24
+#define GICD_IIDR_PRODUCT_ID_MASK      (0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
 /*
  * In systems with a single security state (what we emulate in KVM)
  * the meaning of the interrupt group enable bits is slightly different
index 68d8b1f73682be899af097adf1d95f6452c8a0c8..6c4aaf04046c0f81eb149e8aacfcb813663c5467 100644 (file)
                                        (GICD_INT_DEF_PRI << 8) |\
                                        GICD_INT_DEF_PRI)
 
+#define GICD_IIDR_IMPLEMENTER_SHIFT    0
+#define GICD_IIDR_IMPLEMENTER_MASK     (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT       12
+#define GICD_IIDR_REVISION_MASK                (0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT                16
+#define GICD_IIDR_VARIANT_MASK         (0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT     24
+#define GICD_IIDR_PRODUCT_ID_MASK      (0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
 #define GICH_HCR                       0x0
 #define GICH_VTR                       0x4
 #define GICH_VMCR                      0x8
 #define GICH_LR_PENDING_BIT            (1 << 28)
 #define GICH_LR_ACTIVE_BIT             (1 << 29)
 #define GICH_LR_EOI                    (1 << 19)
+#define GICH_LR_GROUP1                 (1 << 30)
 #define GICH_LR_HW                     (1 << 31)
 
 #define GICH_VMCR_ENABLE_GRP0_SHIFT    0
index 3cf632839337b7cf0288971e2612376d52de636d..07548de5c9889f5bcd425a7273b2732003bf3c30 100644 (file)
@@ -951,6 +951,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_HYPERV_TLBFLUSH 155
 #define KVM_CAP_S390_HPAGE_1M 156
 #define KVM_CAP_NESTED_STATE 157
+#define KVM_CAP_ARM_INJECT_SERROR_ESR 158
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
index 63440cc8d618486af1c970499138738b7882485c..e63662db131ba525c36e79232150fc5f62e2f758 100644 (file)
@@ -96,6 +96,23 @@ static inline int test_and_set_bit(int nr, unsigned long *addr)
        return (old & mask) != 0;
 }
 
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ */
+static inline int test_and_clear_bit(int nr, unsigned long *addr)
+{
+       unsigned long mask = BIT_MASK(nr);
+       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+       unsigned long old;
+
+       old = *p;
+       *p = old & ~mask;
+
+       return (old & mask) != 0;
+}
+
 /**
  * bitmap_alloc - Allocate bitmap
  * @nbits: Number of bits
index dd0e5163f01fb7aeb1381581a7e9a64cb781e314..03b0f551bedffc7eecb6fd4452e99de1a8a4b828 100644 (file)
@@ -11,13 +11,16 @@ TEST_GEN_PROGS_x86_64 += sync_regs_test
 TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
 TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += state_test
+TEST_GEN_PROGS_x86_64 += dirty_log_test
 
 TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
 LIBKVM += $(LIBKVM_$(UNAME_M))
 
 INSTALL_HDR_PATH = $(top_srcdir)/usr
 LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
-CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
+LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include
+CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
+LDFLAGS += -lpthread
 
 # After inclusion, $(OUTPUT) is defined and
 # $(TEST_GEN_PROGS) starts with $(OUTPUT)/
index 8346b33c2073b9b88e2f4d5aac7eab2709a019a0..11ec358bf96906113c203c36bab921d1e41ea972 100644 (file)
 #define X86_FEATURE_OSXSAVE    (1<<27)
 #define VCPU_ID                        1
 
-enum {
-       GUEST_UPDATE_CR4 = 0x1000,
-       GUEST_FAILED,
-       GUEST_DONE,
-};
-
-static void exit_to_hv(uint16_t port)
-{
-       __asm__ __volatile__("in %[port], %%al"
-                            :
-                            : [port]"d"(port)
-                            : "rax");
-}
-
 static inline bool cr4_cpuid_is_sync(void)
 {
        int func, subfunc;
@@ -64,17 +50,15 @@ static void guest_code(void)
        set_cr4(cr4);
 
        /* verify CR4.OSXSAVE == CPUID.OSXSAVE */
-       if (!cr4_cpuid_is_sync())
-               exit_to_hv(GUEST_FAILED);
+       GUEST_ASSERT(cr4_cpuid_is_sync());
 
        /* notify hypervisor to change CR4 */
-       exit_to_hv(GUEST_UPDATE_CR4);
+       GUEST_SYNC(0);
 
        /* check again */
-       if (!cr4_cpuid_is_sync())
-               exit_to_hv(GUEST_FAILED);
+       GUEST_ASSERT(cr4_cpuid_is_sync());
 
-       exit_to_hv(GUEST_DONE);
+       GUEST_DONE();
 }
 
 int main(int argc, char *argv[])
@@ -95,7 +79,7 @@ int main(int argc, char *argv[])
        setbuf(stdout, NULL);
 
        /* Create VM */
-       vm = vm_create_default(VCPU_ID, guest_code);
+       vm = vm_create_default(VCPU_ID, 0, guest_code);
        vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
        run = vcpu_state(vm, VCPU_ID);
 
@@ -104,16 +88,16 @@ int main(int argc, char *argv[])
 
                if (run->exit_reason == KVM_EXIT_IO) {
                        switch (run->io.port) {
-                       case GUEST_UPDATE_CR4:
+                       case GUEST_PORT_SYNC:
                                /* emulate hypervisor clearing CR4.OSXSAVE */
                                vcpu_sregs_get(vm, VCPU_ID, &sregs);
                                sregs.cr4 &= ~X86_CR4_OSXSAVE;
                                vcpu_sregs_set(vm, VCPU_ID, &sregs);
                                break;
-                       case GUEST_FAILED:
+                       case GUEST_PORT_ABORT:
                                TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
                                break;
-                       case GUEST_DONE:
+                       case GUEST_PORT_DONE:
                                goto done;
                        default:
                                TEST_ASSERT(false, "Unknown port 0x%x.",
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
new file mode 100644 (file)
index 0000000..0c2cdc1
--- /dev/null
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define  DEBUG                 printf
+
+#define  VCPU_ID                        1
+/* The memory slot index to track dirty pages */
+#define  TEST_MEM_SLOT_INDEX            1
+/*
+ * GPA offset of the testing memory slot. Must be bigger than the
+ * default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES.
+ */
+#define  TEST_MEM_OFFSET                (1ULL << 30) /* 1G */
+/* Size of the testing memory slot */
+#define  TEST_MEM_PAGES                 (1ULL << 18) /* 1G for 4K pages */
+/* How many pages to dirty for each guest loop */
+#define  TEST_PAGES_PER_LOOP            1024
+/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
+#define  TEST_HOST_LOOP_N               32
+/* Interval for each host loop (ms) */
+#define  TEST_HOST_LOOP_INTERVAL        10
+
+/*
+ * Guest variables.  We use these variables to share data between host
+ * and guest.  There are two copies of the variables, one in host memory
+ * (which is unused) and one in guest memory.  When the host wants to
+ * access these variables, it needs to call addr_gva2hva() to access the
+ * guest copy.
+ */
+uint64_t guest_random_array[TEST_PAGES_PER_LOOP];
+uint64_t guest_iteration;
+uint64_t guest_page_size;
+
+/*
+ * Writes to the first byte of a random page within the testing memory
+ * region continuously.
+ */
+void guest_code(void)
+{
+       int i = 0;
+       uint64_t volatile *array = guest_random_array;
+       uint64_t volatile *guest_addr;
+
+       while (true) {
+               for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
+                       /*
+                        * Write to the first 8 bytes of a random page
+                        * on the testing memory region.
+                        */
+                       guest_addr = (uint64_t *)
+                           (TEST_MEM_OFFSET +
+                            (array[i] % TEST_MEM_PAGES) * guest_page_size);
+                       *guest_addr = guest_iteration;
+               }
+               /* Tell the host that we need more random numbers */
+               GUEST_SYNC(1);
+       }
+}
+
+/*
+ * Host variables.  These variables should only be used by the host
+ * rather than the guest.
+ */
+bool host_quit;
+
+/* Points to the test VM memory region on which we track dirty logs */
+void *host_test_mem;
+
+/* For statistics only */
+uint64_t host_dirty_count;
+uint64_t host_clear_count;
+uint64_t host_track_next_count;
+
+/*
+ * We use this bitmap to track some pages that should have its dirty
+ * bit set in the _next_ iteration.  For example, if we detected the
+ * page value changed to current iteration but at the same time the
+ * page bit is cleared in the latest bitmap, then the system must
+ * report that write in the next get dirty log call.
+ */
+unsigned long *host_bmap_track;
+
+void generate_random_array(uint64_t *guest_array, uint64_t size)
+{
+       uint64_t i;
+
+       for (i = 0; i < size; i++) {
+               guest_array[i] = random();
+       }
+}
+
+void *vcpu_worker(void *data)
+{
+       int ret;
+       uint64_t loops, *guest_array, pages_count = 0;
+       struct kvm_vm *vm = data;
+       struct kvm_run *run;
+       struct guest_args args;
+
+       run = vcpu_state(vm, VCPU_ID);
+
+       /* Retrieve the guest random array pointer and cache it */
+       guest_array = addr_gva2hva(vm, (vm_vaddr_t)guest_random_array);
+
+       DEBUG("VCPU starts\n");
+
+       generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+
+       while (!READ_ONCE(host_quit)) {
+               /* Let the guest to dirty these random pages */
+               ret = _vcpu_run(vm, VCPU_ID);
+               guest_args_read(vm, VCPU_ID, &args);
+               if (run->exit_reason == KVM_EXIT_IO &&
+                   args.port == GUEST_PORT_SYNC) {
+                       pages_count += TEST_PAGES_PER_LOOP;
+                       generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+               } else {
+                       TEST_ASSERT(false,
+                                   "Invalid guest sync status: "
+                                   "exit_reason=%s\n",
+                                   exit_reason_str(run->exit_reason));
+               }
+       }
+
+       DEBUG("VCPU exits, dirtied %"PRIu64" pages\n", pages_count);
+
+       return NULL;
+}
+
+void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration)
+{
+       uint64_t page;
+       uint64_t volatile *value_ptr;
+
+       for (page = 0; page < TEST_MEM_PAGES; page++) {
+               value_ptr = host_test_mem + page * getpagesize();
+
+               /* If this is a special page that we were tracking... */
+               if (test_and_clear_bit(page, host_bmap_track)) {
+                       host_track_next_count++;
+                       TEST_ASSERT(test_bit(page, bmap),
+                                   "Page %"PRIu64" should have its dirty bit "
+                                   "set in this iteration but it is missing",
+                                   page);
+               }
+
+               if (test_bit(page, bmap)) {
+                       host_dirty_count++;
+                       /*
+                        * If the bit is set, the value written onto
+                        * the corresponding page should be either the
+                        * previous iteration number or the current one.
+                        */
+                       TEST_ASSERT(*value_ptr == iteration ||
+                                   *value_ptr == iteration - 1,
+                                   "Set page %"PRIu64" value %"PRIu64
+                                   " incorrect (iteration=%"PRIu64")",
+                                   page, *value_ptr, iteration);
+               } else {
+                       host_clear_count++;
+                       /*
+                        * If cleared, the value written can be any
+                        * value smaller or equals to the iteration
+                        * number.  Note that the value can be exactly
+                        * (iteration-1) if that write can happen
+                        * like this:
+                        *
+                        * (1) increase loop count to "iteration-1"
+                        * (2) write to page P happens (with value
+                        *     "iteration-1")
+                        * (3) get dirty log for "iteration-1"; we'll
+                        *     see that page P bit is set (dirtied),
+                        *     and not set the bit in host_bmap_track
+                        * (4) increase loop count to "iteration"
+                        *     (which is current iteration)
+                        * (5) get dirty log for current iteration,
+                        *     we'll see that page P is cleared, with
+                        *     value "iteration-1".
+                        */
+                       TEST_ASSERT(*value_ptr <= iteration,
+                                   "Clear page %"PRIu64" value %"PRIu64
+                                   " incorrect (iteration=%"PRIu64")",
+                                   page, *value_ptr, iteration);
+                       if (*value_ptr == iteration) {
+                               /*
+                                * This page is _just_ modified; it
+                                * should report its dirtyness in the
+                                * next run
+                                */
+                               set_bit(page, host_bmap_track);
+                       }
+               }
+       }
+}
+
+void help(char *name)
+{
+       puts("");
+       printf("usage: %s [-i iterations] [-I interval] [-h]\n", name);
+       puts("");
+       printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+              TEST_HOST_LOOP_N);
+       printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
+              TEST_HOST_LOOP_INTERVAL);
+       puts("");
+       exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+       pthread_t vcpu_thread;
+       struct kvm_vm *vm;
+       uint64_t volatile *psize, *iteration;
+       unsigned long *bmap, iterations = TEST_HOST_LOOP_N,
+           interval = TEST_HOST_LOOP_INTERVAL;
+       int opt;
+
+       while ((opt = getopt(argc, argv, "hi:I:")) != -1) {
+               switch (opt) {
+               case 'i':
+                       iterations = strtol(optarg, NULL, 10);
+                       break;
+               case 'I':
+                       interval = strtol(optarg, NULL, 10);
+                       break;
+               case 'h':
+               default:
+                       help(argv[0]);
+                       break;
+               }
+       }
+
+       TEST_ASSERT(iterations > 2, "Iteration must be bigger than zero\n");
+       TEST_ASSERT(interval > 0, "Interval must be bigger than zero");
+
+       DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
+             iterations, interval);
+
+       srandom(time(0));
+
+       bmap = bitmap_alloc(TEST_MEM_PAGES);
+       host_bmap_track = bitmap_alloc(TEST_MEM_PAGES);
+
+       vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code);
+
+       /* Add an extra memory slot for testing dirty logging */
+       vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+                                   TEST_MEM_OFFSET,
+                                   TEST_MEM_SLOT_INDEX,
+                                   TEST_MEM_PAGES,
+                                   KVM_MEM_LOG_DIRTY_PAGES);
+       /* Cache the HVA pointer of the region */
+       host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET);
+
+       /* Do 1:1 mapping for the dirty track memory slot */
+       virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET,
+                TEST_MEM_PAGES * getpagesize(), 0);
+
+       vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+       /* Tell the guest about the page size on the system */
+       psize = addr_gva2hva(vm, (vm_vaddr_t)&guest_page_size);
+       *psize = getpagesize();
+
+       /* Start the iterations */
+       iteration = addr_gva2hva(vm, (vm_vaddr_t)&guest_iteration);
+       *iteration = 1;
+
+       /* Start dirtying pages */
+       pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
+
+       while (*iteration < iterations) {
+               /* Give the vcpu thread some time to dirty some pages */
+               usleep(interval * 1000);
+               kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+               vm_dirty_log_verify(bmap, *iteration);
+               (*iteration)++;
+       }
+
+       /* Tell the vcpu thread to quit */
+       host_quit = true;
+       pthread_join(vcpu_thread, NULL);
+
+       DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
+             "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
+             host_track_next_count);
+
+       free(bmap);
+       free(host_bmap_track);
+       kvm_vm_free(vm);
+
+       return 0;
+}
index d32632f71ab8d3021e6a8a6db2270a286d3a4d67..bb5a25fb82c60cbe8f7a108c81bc1c78aa2a7a20 100644 (file)
@@ -55,6 +55,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
 void kvm_vm_free(struct kvm_vm *vmp);
 void kvm_vm_restart(struct kvm_vm *vmp, int perm);
 void kvm_vm_release(struct kvm_vm *vmp);
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
 
 int kvm_memcmp_hva_gva(void *hva,
        struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
@@ -80,6 +81,8 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
 void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot);
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
        uint32_t data_memslot, uint32_t pgd_memslot);
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+             size_t size, uint32_t pgd_memslot);
 void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
 void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
 vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
@@ -127,7 +130,8 @@ kvm_get_supported_cpuid_entry(uint32_t function)
        return kvm_get_supported_cpuid_index(function, 0);
 }
 
-struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code);
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size,
+                                void *guest_code);
 void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
 
 typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr,
@@ -144,4 +148,43 @@ allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
 
 int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
 
+#define GUEST_PORT_SYNC         0x1000
+#define GUEST_PORT_ABORT        0x1001
+#define GUEST_PORT_DONE         0x1002
+
+static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
+{
+       __asm__ __volatile__("in %[port], %%al"
+                            :
+                            : [port]"d"(port), "D"(arg0), "S"(arg1)
+                            : "rax");
+}
+
+/*
+ * Allows to pass three arguments to the host: port is 16bit wide,
+ * arg0 & arg1 are 64bit wide
+ */
+#define GUEST_SYNC_ARGS(_port, _arg0, _arg1) \
+       __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
+
+#define GUEST_ASSERT(_condition) do {                          \
+               if (!(_condition))                              \
+                       GUEST_SYNC_ARGS(GUEST_PORT_ABORT,       \
+                                       "Failed guest assert: " \
+                                       #_condition, __LINE__); \
+       } while (0)
+
+#define GUEST_SYNC(stage)  GUEST_SYNC_ARGS(GUEST_PORT_SYNC, "hello", stage)
+
+#define GUEST_DONE()  GUEST_SYNC_ARGS(GUEST_PORT_DONE, 0, 0)
+
+struct guest_args {
+       uint64_t arg0;
+       uint64_t arg1;
+       uint16_t port;
+} __attribute__ ((packed));
+
+void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
+                    struct guest_args *args);
+
 #endif /* SELFTEST_KVM_UTIL_H */
index ac53730b30aa48c9f9ecf54e3e6362f8cdf0e1c4..73c3933436ecc1206b8f90f2e4991ac4ddcf7bea 100644 (file)
@@ -28,8 +28,6 @@ int test_seq_read(const char *path, char **bufp, size_t *sizep);
 void test_assert(bool exp, const char *exp_str,
                 const char *file, unsigned int line, const char *fmt, ...);
 
-#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0]))
-
 #define TEST_ASSERT(e, fmt, ...) \
        test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
 
index 643309d6de743d0962b55bc4ce6f3c4c82df5c75..e9ba389c48dbc95390d965a8ea7d7f03da9ece44 100644 (file)
@@ -14,6 +14,7 @@
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <linux/kernel.h>
 
 #define KVM_DEV_PATH "/dev/kvm"
 
@@ -168,6 +169,16 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
        }
 }
 
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+{
+       struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
+       int ret;
+
+       ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
+       TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
+                   strerror(-ret));
+}
+
 /* Userspace Memory Region Find
  *
  * Input Args:
@@ -923,6 +934,39 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
        return vaddr_start;
 }
 
+/*
+ * Map a range of VM virtual address to the VM's physical address
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vaddr - Virtuall address to map
+ *   paddr - VM Physical Address
+ *   size - The size of the range to map
+ *   pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the
+ * page range starting at vaddr to the page range starting at paddr.
+ */
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+             size_t size, uint32_t pgd_memslot)
+{
+       size_t page_size = vm->page_size;
+       size_t npages = size / page_size;
+
+       TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
+       TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+       while (npages--) {
+               virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+               vaddr += page_size;
+               paddr += page_size;
+       }
+}
+
 /* Address VM Physical to Host Virtual
  *
  * Input Args:
@@ -1536,3 +1580,17 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
 {
        return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
 }
+
+void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
+                    struct guest_args *args)
+{
+       struct kvm_run *run = vcpu_state(vm, vcpu_id);
+       struct kvm_regs regs;
+
+       memset(&regs, 0, sizeof(regs));
+       vcpu_regs_get(vm, vcpu_id, &regs);
+
+       args->port = run->io.port;
+       args->arg0 = regs.rdi;
+       args->arg1 = regs.rsi;
+}
index e38345252df5e8f0b9ebeddd29751f996c589a5e..a3122f1949a8e67df2d6cb206ecf353d30f6d015 100644 (file)
@@ -702,6 +702,9 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
  *
  * Input Args:
  *   vcpuid - The id of the single VCPU to add to the VM.
+ *   extra_mem_pages - The size of extra memories to add (this will
+ *                     decide how much extra space we will need to
+ *                     setup the page tables using mem slot 0)
  *   guest_code - The vCPU's entry point
  *
  * Output Args: None
@@ -709,12 +712,23 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
  * Return:
  *   Pointer to opaque structure that describes the created VM.
  */
-struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code)
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+                                void *guest_code)
 {
        struct kvm_vm *vm;
+       /*
+        * For x86 the maximum page table size for a memory region
+        * will be when only 4K pages are used.  In that case the
+        * total extra size for page tables (for extra N pages) will
+        * be: N/512+N/512^2+N/512^3+... which is definitely smaller
+        * than N/512*2.
+        */
+       uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
 
        /* Create VM */
-       vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+       vm = vm_create(VM_MODE_FLAT48PG,
+                      DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
+                      O_RDWR);
 
        /* Setup guest code */
        kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
index 090fd3f1935276146225e2e67db2fcd01c57b6c0..881419d5746e7e8a2e0bd8bb318867d50d75d731 100644 (file)
@@ -36,7 +36,7 @@ int main(int argc, char *argv[])
        setbuf(stdout, NULL);
 
        /* Create VM */
-       vm = vm_create_default(VCPU_ID, NULL);
+       vm = vm_create_default(VCPU_ID, 0, NULL);
 
        vcpu_sregs_get(vm, VCPU_ID, &sregs);
        sregs.apic_base = 1 << 10;
index ecabf25b70777d5d8eaa5df915e3c3d750cad65e..900e3e9dfb9f6516e4a31310de08c51dbf192d51 100644 (file)
 #include "vmx.h"
 
 #define VCPU_ID                5
-#define PORT_SYNC      0x1000
-#define PORT_ABORT     0x1001
-#define PORT_DONE      0x1002
-
-static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
-{
-       __asm__ __volatile__("in %[port], %%al"
-                            :
-                            : [port]"d"(port), "D"(arg0), "S"(arg1)
-                            : "rax");
-}
-
-#define exit_to_l0(_port, _arg0, _arg1) \
-       __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
-
-#define GUEST_ASSERT(_condition) do { \
-       if (!(_condition)) \
-               exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition, __LINE__);\
-} while (0)
-
-#define GUEST_SYNC(stage) \
-       exit_to_l0(PORT_SYNC, "hello", stage);
 
 static bool have_nested_state;
 
@@ -137,7 +115,7 @@ void guest_code(struct vmx_pages *vmx_pages)
        if (vmx_pages)
                l1_guest_code(vmx_pages);
 
-       exit_to_l0(PORT_DONE, 0, 0);
+       GUEST_DONE();
 }
 
 int main(int argc, char *argv[])
@@ -154,7 +132,7 @@ int main(int argc, char *argv[])
        struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
 
        /* Create VM */
-       vm = vm_create_default(VCPU_ID, guest_code);
+       vm = vm_create_default(VCPU_ID, 0, guest_code);
        vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
        run = vcpu_state(vm, VCPU_ID);
 
@@ -178,13 +156,13 @@ int main(int argc, char *argv[])
                memset(&regs1, 0, sizeof(regs1));
                vcpu_regs_get(vm, VCPU_ID, &regs1);
                switch (run->io.port) {
-               case PORT_ABORT:
+               case GUEST_PORT_ABORT:
                        TEST_ASSERT(false, "%s at %s:%d", (const char *) regs1.rdi,
                                    __FILE__, regs1.rsi);
                        /* NOT REACHED */
-               case PORT_SYNC:
+               case GUEST_PORT_SYNC:
                        break;
-               case PORT_DONE:
+               case GUEST_PORT_DONE:
                        goto done;
                default:
                        TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
index eae1ece3c31b8505e99877bf11d8fa8fb6907cdc..213343e5dff96f91306d1de162a35fd459896396 100644 (file)
 #include "x86.h"
 
 #define VCPU_ID 5
-#define PORT_HOST_SYNC 0x1000
-
-static void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
-{
-               __asm__ __volatile__("in %[port], %%al"
-                                    :
-                                    : [port]"d"(port), "D"(arg0), "S"(arg1)
-                                    : "rax");
-}
-
-#define exit_to_l0(_port, _arg0, _arg1) \
-        __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
-
-#define GUEST_ASSERT(_condition) do { \
-       if (!(_condition)) \
-               exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition, 0);\
-} while (0)
 
 void guest_code(void)
 {
        for (;;) {
-               exit_to_l0(PORT_HOST_SYNC, "hello", 0);
+               GUEST_SYNC(0);
                asm volatile ("inc %r11");
        }
 }
@@ -111,7 +94,7 @@ int main(int argc, char *argv[])
        }
 
        /* Create VM */
-       vm = vm_create_default(VCPU_ID, guest_code);
+       vm = vm_create_default(VCPU_ID, 0, guest_code);
 
        run = vcpu_state(vm, VCPU_ID);
 
index fc414c28436860a3124f425cfbb57b4bd7240b03..49bcc68b0235040b43091e274f01853a876dc2ad 100644 (file)
@@ -62,27 +62,12 @@ struct kvm_single_msr {
 /* The virtual machine object. */
 static struct kvm_vm *vm;
 
-#define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg))
-static void do_exit_to_l0(uint16_t port, unsigned long arg)
-{
-       __asm__ __volatile__("in %[port], %%al"
-               :
-               : [port]"d"(port), "D"(arg)
-               : "rax");
-}
-
-
-#define GUEST_ASSERT(_condition) do {                                       \
-       if (!(_condition))                                                   \
-               exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \
-} while (0)
-
 static void check_ia32_tsc_adjust(int64_t max)
 {
        int64_t adjust;
 
        adjust = rdmsr(MSR_IA32_TSC_ADJUST);
-       exit_to_l0(PORT_REPORT, adjust);
+       GUEST_SYNC(adjust);
        GUEST_ASSERT(adjust <= max);
 }
 
@@ -132,7 +117,7 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
 
        check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
 
-       exit_to_l0(PORT_DONE, 0);
+       GUEST_DONE();
 }
 
 void report(int64_t val)
@@ -152,7 +137,7 @@ int main(int argc, char *argv[])
                exit(KSFT_SKIP);
        }
 
-       vm = vm_create_default(VCPU_ID, (void *) l1_guest_code);
+       vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
        vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
 
        /* Allocate VMX pages and shared descriptors (vmx_pages). */
@@ -161,26 +146,26 @@ int main(int argc, char *argv[])
 
        for (;;) {
                volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
-               struct kvm_regs regs;
+               struct guest_args args;
 
                vcpu_run(vm, VCPU_ID);
-               vcpu_regs_get(vm, VCPU_ID, &regs);
+               guest_args_read(vm, VCPU_ID, &args);
                TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-                           "Got exit_reason other than KVM_EXIT_IO: %u (%s), rip=%lx\n",
+                           "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
                            run->exit_reason,
-                           exit_reason_str(run->exit_reason), regs.rip);
+                           exit_reason_str(run->exit_reason));
 
-               switch (run->io.port) {
-               case PORT_ABORT:
-                       TEST_ASSERT(false, "%s", (const char *) regs.rdi);
+               switch (args.port) {
+               case GUEST_PORT_ABORT:
+                       TEST_ASSERT(false, "%s", (const char *) args.arg0);
                        /* NOT REACHED */
-               case PORT_REPORT:
-                       report(regs.rdi);
+               case GUEST_PORT_SYNC:
+                       report(args.arg1);
                        break;
-               case PORT_DONE:
+               case GUEST_PORT_DONE:
                        goto done;
                default:
-                       TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
+                       TEST_ASSERT(false, "Unknown port 0x%x.", args.port);
                }
        }
 
index bd3d57f40f1be50205b0a295256af0910c704654..17cecc96f735e3785bb1f6a30e4e20ee613b4bcd 100644 (file)
@@ -295,9 +295,9 @@ static void phys_timer_emulate(struct kvm_vcpu *vcpu)
        struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
        /*
-        * If the timer can fire now we have just raised the IRQ line and we
-        * don't need to have a soft timer scheduled for the future.  If the
-        * timer cannot fire at all, then we also don't need a soft timer.
+        * If the timer can fire now, we don't need to have a soft timer
+        * scheduled for the future.  If the timer cannot fire at all,
+        * then we also don't need a soft timer.
         */
        if (kvm_timer_should_fire(ptimer) || !kvm_timer_irq_can_fire(ptimer)) {
                soft_timer_cancel(&timer->phys_timer, NULL);
@@ -332,10 +332,10 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
        level = kvm_timer_should_fire(vtimer);
        kvm_timer_update_irq(vcpu, level, vtimer);
 
+       phys_timer_emulate(vcpu);
+
        if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
                kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
-
-       phys_timer_emulate(vcpu);
 }
 
 static void vtimer_save_state(struct kvm_vcpu *vcpu)
@@ -487,6 +487,7 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 {
        struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
        struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+       struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
        if (unlikely(!timer->enabled))
                return;
@@ -502,6 +503,10 @@ void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
 
        /* Set the background timer for the physical timer emulation. */
        phys_timer_emulate(vcpu);
+
+       /* If the timer fired while we weren't running, inject it now */
+       if (kvm_timer_should_fire(ptimer) != ptimer->irq.level)
+               kvm_timer_update_irq(vcpu, !ptimer->irq.level, ptimer);
 }
 
 bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
index 108250e4d37640846c36c991420eafe3d4cbb18a..c92053bc3f96f875db4d2443ce79ab065fa2a00b 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/kvm.h>
 #include <linux/kvm_irqfd.h>
 #include <linux/irqbypass.h>
+#include <linux/sched/stat.h>
 #include <trace/events/kvm.h>
 #include <kvm/arm_pmu.h>
 #include <kvm/arm_psci.h>
@@ -380,6 +381,11 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        kvm_timer_vcpu_load(vcpu);
        kvm_vcpu_load_sysregs(vcpu);
        kvm_arch_vcpu_load_fp(vcpu);
+
+       if (single_task_running())
+               vcpu_clear_wfe_traps(vcpu);
+       else
+               vcpu_set_wfe_traps(vcpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -1044,6 +1050,32 @@ static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu,
        return ret;
 }
 
+static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
+                                  struct kvm_vcpu_events *events)
+{
+       memset(events, 0, sizeof(*events));
+
+       return __kvm_arm_vcpu_get_events(vcpu, events);
+}
+
+static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
+                                  struct kvm_vcpu_events *events)
+{
+       int i;
+
+       /* check whether the reserved field is zero */
+       for (i = 0; i < ARRAY_SIZE(events->reserved); i++)
+               if (events->reserved[i])
+                       return -EINVAL;
+
+       /* check whether the pad field is zero */
+       for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++)
+               if (events->exception.pad[i])
+                       return -EINVAL;
+
+       return __kvm_arm_vcpu_set_events(vcpu, events);
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
                         unsigned int ioctl, unsigned long arg)
 {
@@ -1124,6 +1156,25 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = kvm_arm_vcpu_has_attr(vcpu, &attr);
                break;
        }
+       case KVM_GET_VCPU_EVENTS: {
+               struct kvm_vcpu_events events;
+
+               if (kvm_arm_vcpu_get_events(vcpu, &events))
+                       return -EINVAL;
+
+               if (copy_to_user(argp, &events, sizeof(events)))
+                       return -EFAULT;
+
+               return 0;
+       }
+       case KVM_SET_VCPU_EVENTS: {
+               struct kvm_vcpu_events events;
+
+               if (copy_from_user(&events, argp, sizeof(events)))
+                       return -EFAULT;
+
+               return kvm_arm_vcpu_set_events(vcpu, &events);
+       }
        default:
                r = -EINVAL;
        }
index 1d90d79706bd5b71d3914ecd808d2bd6c127286c..91aaf73b00df8a385a28c2a221505b6ba82062b1 100644 (file)
@@ -177,6 +177,35 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr
        put_page(virt_to_page(pmd));
 }
 
+static inline void kvm_set_pte(pte_t *ptep, pte_t new_pte)
+{
+       WRITE_ONCE(*ptep, new_pte);
+       dsb(ishst);
+}
+
+static inline void kvm_set_pmd(pmd_t *pmdp, pmd_t new_pmd)
+{
+       WRITE_ONCE(*pmdp, new_pmd);
+       dsb(ishst);
+}
+
+static inline void kvm_pmd_populate(pmd_t *pmdp, pte_t *ptep)
+{
+       kvm_set_pmd(pmdp, kvm_mk_pmd(ptep));
+}
+
+static inline void kvm_pud_populate(pud_t *pudp, pmd_t *pmdp)
+{
+       WRITE_ONCE(*pudp, kvm_mk_pud(pmdp));
+       dsb(ishst);
+}
+
+static inline void kvm_pgd_populate(pgd_t *pgdp, pud_t *pudp)
+{
+       WRITE_ONCE(*pgdp, kvm_mk_pgd(pudp));
+       dsb(ishst);
+}
+
 /*
  * Unmapping vs dcache management:
  *
@@ -196,6 +225,10 @@ static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr
  * This is why right after unmapping a page/section and invalidating
  * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure
  * the IO subsystem will never hit in the cache.
+ *
+ * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as
+ * we then fully enforce cacheability of RAM, no matter what the guest
+ * does.
  */
 static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd,
                       phys_addr_t addr, phys_addr_t end)
@@ -576,7 +609,6 @@ static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start,
                pte = pte_offset_kernel(pmd, addr);
                kvm_set_pte(pte, pfn_pte(pfn, prot));
                get_page(virt_to_page(pte));
-               kvm_flush_dcache_to_poc(pte, sizeof(*pte));
                pfn++;
        } while (addr += PAGE_SIZE, addr != end);
 }
@@ -601,9 +633,8 @@ static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start,
                                kvm_err("Cannot allocate Hyp pte\n");
                                return -ENOMEM;
                        }
-                       pmd_populate_kernel(NULL, pmd, pte);
+                       kvm_pmd_populate(pmd, pte);
                        get_page(virt_to_page(pmd));
-                       kvm_flush_dcache_to_poc(pmd, sizeof(*pmd));
                }
 
                next = pmd_addr_end(addr, end);
@@ -634,9 +665,8 @@ static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start,
                                kvm_err("Cannot allocate Hyp pmd\n");
                                return -ENOMEM;
                        }
-                       pud_populate(NULL, pud, pmd);
+                       kvm_pud_populate(pud, pmd);
                        get_page(virt_to_page(pud));
-                       kvm_flush_dcache_to_poc(pud, sizeof(*pud));
                }
 
                next = pud_addr_end(addr, end);
@@ -671,9 +701,8 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
                                err = -ENOMEM;
                                goto out;
                        }
-                       pgd_populate(NULL, pgd, pud);
+                       kvm_pgd_populate(pgd, pud);
                        get_page(virt_to_page(pgd));
-                       kvm_flush_dcache_to_poc(pgd, sizeof(*pgd));
                }
 
                next = pgd_addr_end(addr, end);
@@ -1015,19 +1044,35 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache
        pmd = stage2_get_pmd(kvm, cache, addr);
        VM_BUG_ON(!pmd);
 
-       /*
-        * Mapping in huge pages should only happen through a fault.  If a
-        * page is merged into a transparent huge page, the individual
-        * subpages of that huge page should be unmapped through MMU
-        * notifiers before we get here.
-        *
-        * Merging of CompoundPages is not supported; they should become
-        * splitting first, unmapped, merged, and mapped back in on-demand.
-        */
-       VM_BUG_ON(pmd_present(*pmd) && pmd_pfn(*pmd) != pmd_pfn(*new_pmd));
-
        old_pmd = *pmd;
        if (pmd_present(old_pmd)) {
+               /*
+                * Multiple vcpus faulting on the same PMD entry, can
+                * lead to them sequentially updating the PMD with the
+                * same value. Following the break-before-make
+                * (pmd_clear() followed by tlb_flush()) process can
+                * hinder forward progress due to refaults generated
+                * on missing translations.
+                *
+                * Skip updating the page table if the entry is
+                * unchanged.
+                */
+               if (pmd_val(old_pmd) == pmd_val(*new_pmd))
+                       return 0;
+
+               /*
+                * Mapping in huge pages should only happen through a
+                * fault.  If a page is merged into a transparent huge
+                * page, the individual subpages of that huge page
+                * should be unmapped through MMU notifiers before we
+                * get here.
+                *
+                * Merging of CompoundPages is not supported; they
+                * should become splitting first, unmapped, merged,
+                * and mapped back in on-demand.
+                */
+               VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd));
+
                pmd_clear(pmd);
                kvm_tlb_flush_vmid_ipa(kvm, addr);
        } else {
@@ -1090,7 +1135,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
                if (!cache)
                        return 0; /* ignore calls from kvm_set_spte_hva */
                pte = mmu_memory_cache_alloc(cache);
-               pmd_populate_kernel(NULL, pmd, pte);
+               kvm_pmd_populate(pmd, pte);
                get_page(virt_to_page(pmd));
        }
 
@@ -1102,6 +1147,10 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache,
        /* Create 2nd stage page table mapping - Level 3 */
        old_pte = *pte;
        if (pte_present(old_pte)) {
+               /* Skip page table update if there is no change */
+               if (pte_val(old_pte) == pte_val(*new_pte))
+                       return 0;
+
                kvm_set_pte(pte, __pte(0));
                kvm_tlb_flush_vmid_ipa(kvm, addr);
        } else {
index c589d4c2b478b2df7ada6cee1837340c762a41b5..07aa900bac56a3aa5d85dadc462225905c2c4f0f 100644 (file)
 struct vgic_state_iter {
        int nr_cpus;
        int nr_spis;
+       int nr_lpis;
        int dist_id;
        int vcpu_id;
        int intid;
+       int lpi_idx;
+       u32 *lpi_array;
 };
 
 static void iter_next(struct vgic_state_iter *iter)
@@ -52,6 +55,12 @@ static void iter_next(struct vgic_state_iter *iter)
        if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
            ++iter->vcpu_id < iter->nr_cpus)
                iter->intid = 0;
+
+       if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) {
+               if (iter->lpi_idx < iter->nr_lpis)
+                       iter->intid = iter->lpi_array[iter->lpi_idx];
+               iter->lpi_idx++;
+       }
 }
 
 static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
@@ -63,6 +72,11 @@ static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
 
        iter->nr_cpus = nr_cpus;
        iter->nr_spis = kvm->arch.vgic.nr_spis;
+       if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
+               iter->nr_lpis = vgic_copy_lpi_list(kvm, NULL, &iter->lpi_array);
+               if (iter->nr_lpis < 0)
+                       iter->nr_lpis = 0;
+       }
 
        /* Fast forward to the right position if needed */
        while (pos--)
@@ -73,7 +87,8 @@ static bool end_of_vgic(struct vgic_state_iter *iter)
 {
        return iter->dist_id > 0 &&
                iter->vcpu_id == iter->nr_cpus &&
-               (iter->intid - VGIC_NR_PRIVATE_IRQS) == iter->nr_spis;
+               iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) &&
+               iter->lpi_idx > iter->nr_lpis;
 }
 
 static void *vgic_debug_start(struct seq_file *s, loff_t *pos)
@@ -130,6 +145,7 @@ static void vgic_debug_stop(struct seq_file *s, void *v)
 
        mutex_lock(&kvm->lock);
        iter = kvm->arch.vgic.iter;
+       kfree(iter->lpi_array);
        kfree(iter);
        kvm->arch.vgic.iter = NULL;
        mutex_unlock(&kvm->lock);
@@ -137,17 +153,20 @@ static void vgic_debug_stop(struct seq_file *s, void *v)
 
 static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
 {
+       bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3;
+
        seq_printf(s, "Distributor\n");
        seq_printf(s, "===========\n");
-       seq_printf(s, "vgic_model:\t%s\n",
-                  (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) ?
-                  "GICv3" : "GICv2");
+       seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2");
        seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
+       if (v3)
+               seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count);
        seq_printf(s, "enabled:\t%d\n", dist->enabled);
        seq_printf(s, "\n");
 
        seq_printf(s, "P=pending_latch, L=line_level, A=active\n");
        seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n");
+       seq_printf(s, "G=group\n");
 }
 
 static void print_header(struct seq_file *s, struct vgic_irq *irq,
@@ -162,8 +181,8 @@ static void print_header(struct seq_file *s, struct vgic_irq *irq,
        }
 
        seq_printf(s, "\n");
-       seq_printf(s, "%s%2d TYP   ID TGT_ID PLAEHC     HWID   TARGET SRC PRI VCPU_ID\n", hdr, id);
-       seq_printf(s, "---------------------------------------------------------------\n");
+       seq_printf(s, "%s%2d TYP   ID TGT_ID PLAEHCG     HWID   TARGET SRC PRI VCPU_ID\n", hdr, id);
+       seq_printf(s, "----------------------------------------------------------------\n");
 }
 
 static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
@@ -174,15 +193,17 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
                type = "SGI";
        else if (irq->intid < VGIC_NR_PRIVATE_IRQS)
                type = "PPI";
-       else
+       else if (irq->intid < VGIC_MAX_SPI)
                type = "SPI";
+       else
+               type = "LPI";
 
        if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS)
                print_header(s, irq, vcpu);
 
        seq_printf(s, "       %s %4d "
                      "    %2d "
-                     "%d%d%d%d%d%d "
+                     "%d%d%d%d%d%d%d "
                      "%8d "
                      "%8x "
                      " %2x "
@@ -197,12 +218,12 @@ static void print_irq_state(struct seq_file *s, struct vgic_irq *irq,
                        irq->enabled,
                        irq->hw,
                        irq->config == VGIC_CONFIG_LEVEL,
+                       irq->group,
                        irq->hwintid,
                        irq->mpidr,
                        irq->source,
                        irq->priority,
                        (irq->vcpu) ? irq->vcpu->vcpu_id : -1);
-
 }
 
 static int vgic_debug_show(struct seq_file *s, void *v)
@@ -221,17 +242,20 @@ static int vgic_debug_show(struct seq_file *s, void *v)
        if (!kvm->arch.vgic.initialized)
                return 0;
 
-       if (iter->vcpu_id < iter->nr_cpus) {
+       if (iter->vcpu_id < iter->nr_cpus)
                vcpu = kvm_get_vcpu(kvm, iter->vcpu_id);
-               irq = &vcpu->arch.vgic_cpu.private_irqs[iter->intid];
-       } else {
-               irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS];
+
+       irq = vgic_get_irq(kvm, vcpu, iter->intid);
+       if (!irq) {
+               seq_printf(s, "       LPI %4d freed\n", iter->intid);
+               return 0;
        }
 
        spin_lock_irqsave(&irq->irq_lock, flags);
        print_irq_state(s, irq, vcpu);
        spin_unlock_irqrestore(&irq->irq_lock, flags);
 
+       vgic_put_irq(kvm, irq);
        return 0;
 }
 
index 2673efce65f34ac95511f555377981e78119f248..c0c0b88af1d58c0e1c3015424be66272f4873845 100644 (file)
@@ -175,10 +175,13 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
                irq->vcpu = NULL;
                irq->target_vcpu = vcpu0;
                kref_init(&irq->refcount);
-               if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
+               if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) {
                        irq->targets = 0;
-               else
+                       irq->group = 0;
+               } else {
                        irq->mpidr = 0;
+                       irq->group = 1;
+               }
        }
        return 0;
 }
@@ -227,6 +230,18 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
                        /* PPIs */
                        irq->config = VGIC_CONFIG_LEVEL;
                }
+
+               /*
+                * GICv3 can only be created via the KVM_DEVICE_CREATE API and
+                * so we always know the emulation type at this point as it's
+                * either explicitly configured as GICv3, or explicitly
+                * configured as GICv2, or not configured yet which also
+                * implies GICv2.
+                */
+               if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+                       irq->group = 1;
+               else
+                       irq->group = 0;
        }
 
        if (!irqchip_in_kernel(vcpu->kvm))
@@ -271,6 +286,10 @@ int vgic_init(struct kvm *kvm)
        if (vgic_initialized(kvm))
                return 0;
 
+       /* Are we also in the middle of creating a VCPU? */
+       if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus))
+               return -EBUSY;
+
        /* freeze the number of spis */
        if (!dist->nr_spis)
                dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
@@ -294,6 +313,7 @@ int vgic_init(struct kvm *kvm)
 
        vgic_debug_init(kvm);
 
+       dist->implementation_rev = 2;
        dist->initialized = true;
 
 out:
index 4ed79c939fb447188d0a2bc7c740e85e1d82135c..12502251727eb9aaf35bd1caa63e32a193082042 100644 (file)
@@ -71,6 +71,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
        kref_init(&irq->refcount);
        irq->intid = intid;
        irq->target_vcpu = vcpu;
+       irq->group = 1;
 
        spin_lock_irqsave(&dist->lpi_list_lock, flags);
 
@@ -168,8 +169,14 @@ struct vgic_its_abi {
        int (*commit)(struct vgic_its *its);
 };
 
+#define ABI_0_ESZ      8
+#define ESZ_MAX                ABI_0_ESZ
+
 static const struct vgic_its_abi its_table_abi_versions[] = {
-       [0] = {.cte_esz = 8, .dte_esz = 8, .ite_esz = 8,
+       [0] = {
+        .cte_esz = ABI_0_ESZ,
+        .dte_esz = ABI_0_ESZ,
+        .ite_esz = ABI_0_ESZ,
         .save_tables = vgic_its_save_tables_v0,
         .restore_tables = vgic_its_restore_tables_v0,
         .commit = vgic_its_commit_v0,
@@ -183,7 +190,7 @@ inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its)
        return &its_table_abi_versions[its->abi_rev];
 }
 
-int vgic_its_set_abi(struct vgic_its *its, int rev)
+static int vgic_its_set_abi(struct vgic_its *its, u32 rev)
 {
        const struct vgic_its_abi *abi;
 
@@ -312,9 +319,9 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
  * enumerate those LPIs without holding any lock.
  * Returns their number and puts the kmalloc'ed array into intid_ptr.
  */
-static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
+int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
 {
-       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+       struct vgic_dist *dist = &kvm->arch.vgic;
        struct vgic_irq *irq;
        unsigned long flags;
        u32 *intids;
@@ -337,7 +344,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
                if (i == irq_count)
                        break;
                /* We don't need to "get" the IRQ, as we hold the list lock. */
-               if (irq->target_vcpu != vcpu)
+               if (vcpu && irq->target_vcpu != vcpu)
                        continue;
                intids[i++] = irq->intid;
        }
@@ -429,7 +436,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
        unsigned long flags;
        u8 pendmask;
 
-       nr_irqs = vgic_copy_lpi_list(vcpu, &intids);
+       nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids);
        if (nr_irqs < 0)
                return nr_irqs;
 
@@ -1154,7 +1161,7 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
 
        vcpu = kvm_get_vcpu(kvm, collection->target_addr);
 
-       irq_count = vgic_copy_lpi_list(vcpu, &intids);
+       irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
        if (irq_count < 0)
                return irq_count;
 
@@ -1202,7 +1209,7 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
        vcpu1 = kvm_get_vcpu(kvm, target1_addr);
        vcpu2 = kvm_get_vcpu(kvm, target2_addr);
 
-       irq_count = vgic_copy_lpi_list(vcpu1, &intids);
+       irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids);
        if (irq_count < 0)
                return irq_count;
 
@@ -1881,14 +1888,14 @@ typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry,
  * Return: < 0 on error, 0 if last element was identified, 1 otherwise
  * (the last element may not be found on second level tables)
  */
-static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz,
+static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
                          int start_id, entry_fn_t fn, void *opaque)
 {
        struct kvm *kvm = its->dev->kvm;
        unsigned long len = size;
        int id = start_id;
        gpa_t gpa = base;
-       char entry[esz];
+       char entry[ESZ_MAX];
        int ret;
 
        memset(entry, 0, esz);
index ffc587bf4742676d14930d21f07005275576a79e..738b65d2d0e76587d24fa0a44f7107c05400cc49 100644 (file)
 #include "vgic.h"
 #include "vgic-mmio.h"
 
+/*
+ * The Revision field in the IIDR have the following meanings:
+ *
+ * Revision 1: Report GICv2 interrupts as group 0 instead of group 1
+ * Revision 2: Interrupt groups are guest-configurable and signaled using
+ *            their configured groups.
+ */
+
 static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu,
                                            gpa_t addr, unsigned int len)
 {
+       struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
        u32 value;
 
        switch (addr & 0x0c) {
        case GIC_DIST_CTRL:
-               value = vcpu->kvm->arch.vgic.enabled ? GICD_ENABLE : 0;
+               value = vgic->enabled ? GICD_ENABLE : 0;
                break;
        case GIC_DIST_CTR:
-               value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+               value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
                value = (value >> 5) - 1;
                value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
                break;
        case GIC_DIST_IIDR:
-               value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+               value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
+                       (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
+                       (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
                break;
        default:
                return 0;
@@ -66,6 +77,42 @@ static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu,
        }
 }
 
+static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu,
+                                          gpa_t addr, unsigned int len,
+                                          unsigned long val)
+{
+       switch (addr & 0x0c) {
+       case GIC_DIST_IIDR:
+               if (val != vgic_mmio_read_v2_misc(vcpu, addr, len))
+                       return -EINVAL;
+
+               /*
+                * If we observe a write to GICD_IIDR we know that userspace
+                * has been updated and has had a chance to cope with older
+                * kernels (VGICv2 IIDR.Revision == 0) incorrectly reporting
+                * interrupts as group 1, and therefore we now allow groups to
+                * be user writable.  Doing this by default would break
+                * migration from old kernels to new kernels with legacy
+                * userspace.
+                */
+               vcpu->kvm->arch.vgic.v2_groups_user_writable = true;
+               return 0;
+       }
+
+       vgic_mmio_write_v2_misc(vcpu, addr, len, val);
+       return 0;
+}
+
+static int vgic_mmio_uaccess_write_v2_group(struct kvm_vcpu *vcpu,
+                                           gpa_t addr, unsigned int len,
+                                           unsigned long val)
+{
+       if (vcpu->kvm->arch.vgic.v2_groups_user_writable)
+               vgic_mmio_write_group(vcpu, addr, len, val);
+
+       return 0;
+}
+
 static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
                                 gpa_t addr, unsigned int len,
                                 unsigned long val)
@@ -352,17 +399,22 @@ static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu,
 
                if (n > vgic_v3_max_apr_idx(vcpu))
                        return;
+
+               n = array_index_nospec(n, 4);
+
                /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */
                vgicv3->vgic_ap1r[n] = val;
        }
 }
 
 static const struct vgic_register_region vgic_v2_dist_registers[] = {
-       REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL,
-               vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12,
-               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH_UACCESS(GIC_DIST_CTRL,
+               vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc,
+               NULL, vgic_mmio_uaccess_write_v2_misc,
+               12, VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP,
-               vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1,
+               vgic_mmio_read_group, vgic_mmio_write_group,
+               NULL, vgic_mmio_uaccess_write_v2_group, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
                vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
index 287784095b5b30a28c380d53f90bfcee977aa434..a2a175b08b172877c0e58c39e4c45db432b970ac 100644 (file)
@@ -59,19 +59,27 @@ bool vgic_supports_direct_msis(struct kvm *kvm)
        return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm);
 }
 
+/*
+ * The Revision field in the IIDR have the following meanings:
+ *
+ * Revision 2: Interrupt groups are guest-configurable and signaled using
+ *            their configured groups.
+ */
+
 static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
                                            gpa_t addr, unsigned int len)
 {
+       struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
        u32 value = 0;
 
        switch (addr & 0x0c) {
        case GICD_CTLR:
-               if (vcpu->kvm->arch.vgic.enabled)
+               if (vgic->enabled)
                        value |= GICD_CTLR_ENABLE_SS_G1;
                value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
                break;
        case GICD_TYPER:
-               value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+               value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS;
                value = (value >> 5) - 1;
                if (vgic_has_its(vcpu->kvm)) {
                        value |= (INTERRUPT_ID_BITS_ITS - 1) << 19;
@@ -81,7 +89,9 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
                }
                break;
        case GICD_IIDR:
-               value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+               value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) |
+                       (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) |
+                       (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT);
                break;
        default:
                return 0;
@@ -110,6 +120,20 @@ static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
        }
 }
 
+static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu,
+                                          gpa_t addr, unsigned int len,
+                                          unsigned long val)
+{
+       switch (addr & 0x0c) {
+       case GICD_IIDR:
+               if (val != vgic_mmio_read_v3_misc(vcpu, addr, len))
+                       return -EINVAL;
+       }
+
+       vgic_mmio_write_v3_misc(vcpu, addr, len, val);
+       return 0;
+}
+
 static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
                                            gpa_t addr, unsigned int len)
 {
@@ -246,9 +270,9 @@ static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
        return value;
 }
 
-static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
-                                         gpa_t addr, unsigned int len,
-                                         unsigned long val)
+static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
+                                        gpa_t addr, unsigned int len,
+                                        unsigned long val)
 {
        u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
        int i;
@@ -273,6 +297,8 @@ static void vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
 
                vgic_put_irq(vcpu->kvm, irq);
        }
+
+       return 0;
 }
 
 /* We want to avoid outer shareable. */
@@ -444,14 +470,15 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
        }
 
 static const struct vgic_register_region vgic_v3_dist_registers[] = {
-       REGISTER_DESC_WITH_LENGTH(GICD_CTLR,
-               vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16,
-               VGIC_ACCESS_32bit),
+       REGISTER_DESC_WITH_LENGTH_UACCESS(GICD_CTLR,
+               vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc,
+               NULL, vgic_mmio_uaccess_write_v3_misc,
+               16, VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_LENGTH(GICD_STATUSR,
                vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR,
-               vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1,
+               vgic_mmio_read_group, vgic_mmio_write_group, NULL, NULL, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
                vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1,
@@ -465,7 +492,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = {
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
                vgic_mmio_read_pending, vgic_mmio_write_cpending,
-               vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
+               vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 1,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
                vgic_mmio_read_active, vgic_mmio_write_sactive,
@@ -524,7 +551,7 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
 
 static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
        REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
-               vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
+               vgic_mmio_read_group, vgic_mmio_write_group, 4,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
                vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
@@ -538,7 +565,7 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICPENDR0,
                vgic_mmio_read_pending, vgic_mmio_write_cpending,
-               vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+               vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4,
                VGIC_ACCESS_32bit),
        REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0,
                vgic_mmio_read_active, vgic_mmio_write_sactive,
@@ -873,7 +900,8 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
 /**
  * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
  * @vcpu: The VCPU requesting a SGI
- * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU
+ * @reg: The value written into ICC_{ASGI1,SGI0,SGI1}R by that VCPU
+ * @allow_group1: Does the sysreg access allow generation of G1 SGIs
  *
  * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
  * This will trap in sys_regs.c and call this function.
@@ -883,7 +911,7 @@ static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
  * check for matching ones. If this bit is set, we signal all, but not the
  * calling VCPU.
  */
-void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
+void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1)
 {
        struct kvm *kvm = vcpu->kvm;
        struct kvm_vcpu *c_vcpu;
@@ -932,9 +960,19 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
                irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
 
                spin_lock_irqsave(&irq->irq_lock, flags);
-               irq->pending_latch = true;
 
-               vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+               /*
+                * An access targetting Group0 SGIs can only generate
+                * those, while an access targetting Group1 SGIs can
+                * generate interrupts of either group.
+                */
+               if (!irq->group || allow_group1) {
+                       irq->pending_latch = true;
+                       vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+               } else {
+                       spin_unlock_irqrestore(&irq->irq_lock, flags);
+               }
+
                vgic_put_irq(vcpu->kvm, irq);
        }
 }
index ff9655cfeb2f8678558163790de2aeb6ec5841df..f56ff1cf52ec25fc73aa15e4b1f909ef755c95d4 100644 (file)
@@ -40,6 +40,51 @@ void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
        /* Ignore */
 }
 
+int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
+                              unsigned int len, unsigned long val)
+{
+       /* Ignore */
+       return 0;
+}
+
+unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu,
+                                  gpa_t addr, unsigned int len)
+{
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       u32 value = 0;
+       int i;
+
+       /* Loop over all IRQs affected by this read */
+       for (i = 0; i < len * 8; i++) {
+               struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+               if (irq->group)
+                       value |= BIT(i);
+
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+
+       return value;
+}
+
+void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr,
+                          unsigned int len, unsigned long val)
+{
+       u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+       int i;
+       unsigned long flags;
+
+       for (i = 0; i < len * 8; i++) {
+               struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+               spin_lock_irqsave(&irq->irq_lock, flags);
+               irq->group = !!(val & BIT(i));
+               vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
+
+               vgic_put_irq(vcpu->kvm, irq);
+       }
+}
+
 /*
  * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value
  * of the enabled bit, so there is only one function for both here.
@@ -363,11 +408,12 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
        mutex_unlock(&vcpu->kvm->lock);
 }
 
-void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
+int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
                                     gpa_t addr, unsigned int len,
                                     unsigned long val)
 {
        __vgic_mmio_write_cactive(vcpu, addr, len, val);
+       return 0;
 }
 
 static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
@@ -399,11 +445,12 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
        mutex_unlock(&vcpu->kvm->lock);
 }
 
-void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
+int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
                                     gpa_t addr, unsigned int len,
                                     unsigned long val)
 {
        __vgic_mmio_write_sactive(vcpu, addr, len, val);
+       return 0;
 }
 
 unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
@@ -735,10 +782,9 @@ static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
 
        r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
        if (region->uaccess_write)
-               region->uaccess_write(r_vcpu, addr, sizeof(u32), *val);
-       else
-               region->write(r_vcpu, addr, sizeof(u32), *val);
+               return region->uaccess_write(r_vcpu, addr, sizeof(u32), *val);
 
+       region->write(r_vcpu, addr, sizeof(u32), *val);
        return 0;
 }
 
index 5693f6df45ecd57e5d7b28aecc1579f87cdcd91e..a07f90acdaecb4af617326912a7a2edab1c83425 100644 (file)
@@ -37,8 +37,8 @@ struct vgic_register_region {
        unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr,
                                      unsigned int len);
        union {
-               void (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
-                                     unsigned int len, unsigned long val);
+               int (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr,
+                                    unsigned int len, unsigned long val);
                int (*uaccess_its_write)(struct kvm *kvm, struct vgic_its *its,
                                         gpa_t addr, unsigned int len,
                                         unsigned long val);
@@ -134,6 +134,15 @@ unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
 void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
                        unsigned int len, unsigned long val);
 
+int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
+                              unsigned int len, unsigned long val);
+
+unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, gpa_t addr,
+                                  unsigned int len);
+
+void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr,
+                          unsigned int len, unsigned long val);
+
 unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
                                    gpa_t addr, unsigned int len);
 
@@ -167,13 +176,13 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
                             gpa_t addr, unsigned int len,
                             unsigned long val);
 
-void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
-                                    gpa_t addr, unsigned int len,
-                                    unsigned long val);
+int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
+                                   gpa_t addr, unsigned int len,
+                                   unsigned long val);
 
-void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
-                                    gpa_t addr, unsigned int len,
-                                    unsigned long val);
+int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
+                                   gpa_t addr, unsigned int len,
+                                   unsigned long val);
 
 unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
                                      gpa_t addr, unsigned int len);
index a5f2e44f1c33d42ad2693d63b8142665864e64a6..69b892abd7dc6faec17e820fe6f4c0f95800ecb3 100644 (file)
@@ -62,7 +62,8 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
        struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2;
        int lr;
-       unsigned long flags;
+
+       DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 
        cpuif->vgic_hcr &= ~GICH_HCR_UIE;
 
@@ -83,7 +84,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
 
                irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
 
-               spin_lock_irqsave(&irq->irq_lock, flags);
+               spin_lock(&irq->irq_lock);
 
                /* Always preserve the active bit */
                irq->active = !!(val & GICH_LR_ACTIVE_BIT);
@@ -126,7 +127,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
                                vgic_irq_set_phys_active(irq, false);
                }
 
-               spin_unlock_irqrestore(&irq->irq_lock, flags);
+               spin_unlock(&irq->irq_lock);
                vgic_put_irq(vcpu->kvm, irq);
        }
 
@@ -159,6 +160,9 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
                }
        }
 
+       if (irq->group)
+               val |= GICH_LR_GROUP1;
+
        if (irq->hw) {
                val |= GICH_LR_HW;
                val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
index cdce653e3c47fb31b9eb0ccf73c3bebd830d8496..9c0dd234ebe8112f601e81083de436d5547b5c55 100644 (file)
@@ -46,7 +46,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
        struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3;
        u32 model = vcpu->kvm->arch.vgic.vgic_model;
        int lr;
-       unsigned long flags;
+
+       DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 
        cpuif->vgic_hcr &= ~ICH_HCR_UIE;
 
@@ -75,7 +76,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
                if (!irq)       /* An LPI could have been unmapped. */
                        continue;
 
-               spin_lock_irqsave(&irq->irq_lock, flags);
+               spin_lock(&irq->irq_lock);
 
                /* Always preserve the active bit */
                irq->active = !!(val & ICH_LR_ACTIVE_BIT);
@@ -118,7 +119,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
                                vgic_irq_set_phys_active(irq, false);
                }
 
-               spin_unlock_irqrestore(&irq->irq_lock, flags);
+               spin_unlock(&irq->irq_lock);
                vgic_put_irq(vcpu->kvm, irq);
        }
 
@@ -197,11 +198,7 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
        if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT))
                irq->line_level = false;
 
-       /*
-        * We currently only support Group1 interrupts, which is a
-        * known defect. This needs to be addressed at some point.
-        */
-       if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
+       if (irq->group)
                val |= ICH_LR_GROUP;
 
        val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
index 33c8325c8f35662c03c37319605d28e2f2f9d7a4..7cfdfbc910e0c48e38b9f6eaaefacbbe71d043fc 100644 (file)
 #define CREATE_TRACE_POINTS
 #include "trace.h"
 
-#ifdef CONFIG_DEBUG_SPINLOCK
-#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
-#else
-#define DEBUG_SPINLOCK_BUG_ON(p)
-#endif
-
 struct vgic_global kvm_vgic_global_state __ro_after_init = {
        .gicv3_cpuif = STATIC_KEY_FALSE_INIT,
 };
@@ -599,10 +593,11 @@ static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
        struct vgic_irq *irq, *tmp;
-       unsigned long flags;
+
+       DEBUG_SPINLOCK_BUG_ON(!irqs_disabled());
 
 retry:
-       spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
+       spin_lock(&vgic_cpu->ap_list_lock);
 
        list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
                struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
@@ -643,7 +638,7 @@ retry:
                /* This interrupt looks like it has to be migrated. */
 
                spin_unlock(&irq->irq_lock);
-               spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
+               spin_unlock(&vgic_cpu->ap_list_lock);
 
                /*
                 * Ensure locking order by always locking the smallest
@@ -657,7 +652,7 @@ retry:
                        vcpuB = vcpu;
                }
 
-               spin_lock_irqsave(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
+               spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
                spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
                                 SINGLE_DEPTH_NESTING);
                spin_lock(&irq->irq_lock);
@@ -682,7 +677,7 @@ retry:
 
                spin_unlock(&irq->irq_lock);
                spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
-               spin_unlock_irqrestore(&vcpuA->arch.vgic_cpu.ap_list_lock, flags);
+               spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
 
                if (target_vcpu_needs_kick) {
                        kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu);
@@ -692,7 +687,7 @@ retry:
                goto retry;
        }
 
-       spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags);
+       spin_unlock(&vgic_cpu->ap_list_lock);
 }
 
 static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
index ead00b2072b260321c31330e20a8031b5ba362d8..a90024718ca44b941e8b4d7aa0f6bf20233c4596 100644 (file)
 #define KVM_VGIC_V3_RDIST_COUNT_MASK   GENMASK_ULL(63, 52)
 #define KVM_VGIC_V3_RDIST_COUNT_SHIFT  52
 
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
+#else
+#define DEBUG_SPINLOCK_BUG_ON(p)
+#endif
+
 /* Requires the irq_lock to be held by the caller. */
 static inline bool irq_is_pending(struct vgic_irq *irq)
 {
@@ -305,6 +311,7 @@ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size)
                (base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE);
 }
 
+int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
 int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
                         u32 devid, u32 eventid, struct vgic_irq **irq);
 struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);