Merge tag 'iommu-updates-v4.18' of git://git.kernel.org/pub/scm/linux/kernel/git...
[muen/linux.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  *          Joerg Roedel <jroedel@suse.de>
19  */
20
21 #define pr_fmt(fmt)     "DMAR: " fmt
22
23 #include <linux/init.h>
24 #include <linux/bitmap.h>
25 #include <linux/debugfs.h>
26 #include <linux/export.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/dma-direct.h>
35 #include <linux/mempool.h>
36 #include <linux/memory.h>
37 #include <linux/cpu.h>
38 #include <linux/timer.h>
39 #include <linux/io.h>
40 #include <linux/iova.h>
41 #include <linux/iommu.h>
42 #include <linux/intel-iommu.h>
43 #include <linux/syscore_ops.h>
44 #include <linux/tboot.h>
45 #include <linux/dmi.h>
46 #include <linux/pci-ats.h>
47 #include <linux/memblock.h>
48 #include <linux/dma-contiguous.h>
49 #include <linux/dma-direct.h>
50 #include <linux/crash_dump.h>
51 #include <asm/irq_remapping.h>
52 #include <asm/cacheflush.h>
53 #include <asm/iommu.h>
54
55 #include "irq_remapping.h"
56
57 #define ROOT_SIZE               VTD_PAGE_SIZE
58 #define CONTEXT_SIZE            VTD_PAGE_SIZE
59
60 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
61 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
62 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
63 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
64
65 #define IOAPIC_RANGE_START      (0xfee00000)
66 #define IOAPIC_RANGE_END        (0xfeefffff)
67 #define IOVA_START_ADDR         (0x1000)
68
69 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
70
71 #define MAX_AGAW_WIDTH 64
72 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
73
74 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
75 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
76
77 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
78    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
79 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
80                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
81 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
82
83 /* IO virtual address start page frame number */
84 #define IOVA_START_PFN          (1)
85
86 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
87
88 /* page table handling */
89 #define LEVEL_STRIDE            (9)
90 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
91
92 /*
93  * This bitmap is used to advertise the page sizes our hardware support
94  * to the IOMMU core, which will then use this information to split
95  * physically contiguous memory regions it is mapping into page sizes
96  * that we support.
97  *
98  * Traditionally the IOMMU core just handed us the mappings directly,
99  * after making sure the size is an order of a 4KiB page and that the
100  * mapping has natural alignment.
101  *
102  * To retain this behavior, we currently advertise that we support
103  * all page sizes that are an order of 4KiB.
104  *
105  * If at some point we'd like to utilize the IOMMU core's new behavior,
106  * we could change this to advertise the real page sizes we support.
107  */
108 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
109
110 static inline int agaw_to_level(int agaw)
111 {
112         return agaw + 2;
113 }
114
115 static inline int agaw_to_width(int agaw)
116 {
117         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
118 }
119
120 static inline int width_to_agaw(int width)
121 {
122         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
123 }
124
125 static inline unsigned int level_to_offset_bits(int level)
126 {
127         return (level - 1) * LEVEL_STRIDE;
128 }
129
130 static inline int pfn_level_offset(unsigned long pfn, int level)
131 {
132         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
133 }
134
135 static inline unsigned long level_mask(int level)
136 {
137         return -1UL << level_to_offset_bits(level);
138 }
139
140 static inline unsigned long level_size(int level)
141 {
142         return 1UL << level_to_offset_bits(level);
143 }
144
145 static inline unsigned long align_to_level(unsigned long pfn, int level)
146 {
147         return (pfn + level_size(level) - 1) & level_mask(level);
148 }
149
150 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
151 {
152         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
153 }
154
155 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
156    are never going to work. */
157 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
158 {
159         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
160 }
161
162 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
163 {
164         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
165 }
166 static inline unsigned long page_to_dma_pfn(struct page *pg)
167 {
168         return mm_to_dma_pfn(page_to_pfn(pg));
169 }
170 static inline unsigned long virt_to_dma_pfn(void *p)
171 {
172         return page_to_dma_pfn(virt_to_page(p));
173 }
174
175 /* global iommu list, set NULL for ignored DMAR units */
176 static struct intel_iommu **g_iommus;
177
178 static void __init check_tylersburg_isoch(void);
179 static int rwbf_quirk;
180
181 /*
182  * set to 1 to panic kernel if can't successfully enable VT-d
183  * (used when kernel is launched w/ TXT)
184  */
185 static int force_on = 0;
186 int intel_iommu_tboot_noforce;
187
188 /*
189  * 0: Present
190  * 1-11: Reserved
191  * 12-63: Context Ptr (12 - (haw-1))
192  * 64-127: Reserved
193  */
194 struct root_entry {
195         u64     lo;
196         u64     hi;
197 };
198 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
199
200 /*
201  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
202  * if marked present.
203  */
204 static phys_addr_t root_entry_lctp(struct root_entry *re)
205 {
206         if (!(re->lo & 1))
207                 return 0;
208
209         return re->lo & VTD_PAGE_MASK;
210 }
211
212 /*
213  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
214  * if marked present.
215  */
216 static phys_addr_t root_entry_uctp(struct root_entry *re)
217 {
218         if (!(re->hi & 1))
219                 return 0;
220
221         return re->hi & VTD_PAGE_MASK;
222 }
223 /*
224  * low 64 bits:
225  * 0: present
226  * 1: fault processing disable
227  * 2-3: translation type
228  * 12-63: address space root
229  * high 64 bits:
230  * 0-2: address width
231  * 3-6: aval
232  * 8-23: domain id
233  */
234 struct context_entry {
235         u64 lo;
236         u64 hi;
237 };
238
239 static inline void context_clear_pasid_enable(struct context_entry *context)
240 {
241         context->lo &= ~(1ULL << 11);
242 }
243
244 static inline bool context_pasid_enabled(struct context_entry *context)
245 {
246         return !!(context->lo & (1ULL << 11));
247 }
248
249 static inline void context_set_copied(struct context_entry *context)
250 {
251         context->hi |= (1ull << 3);
252 }
253
254 static inline bool context_copied(struct context_entry *context)
255 {
256         return !!(context->hi & (1ULL << 3));
257 }
258
259 static inline bool __context_present(struct context_entry *context)
260 {
261         return (context->lo & 1);
262 }
263
264 static inline bool context_present(struct context_entry *context)
265 {
266         return context_pasid_enabled(context) ?
267              __context_present(context) :
268              __context_present(context) && !context_copied(context);
269 }
270
271 static inline void context_set_present(struct context_entry *context)
272 {
273         context->lo |= 1;
274 }
275
276 static inline void context_set_fault_enable(struct context_entry *context)
277 {
278         context->lo &= (((u64)-1) << 2) | 1;
279 }
280
281 static inline void context_set_translation_type(struct context_entry *context,
282                                                 unsigned long value)
283 {
284         context->lo &= (((u64)-1) << 4) | 3;
285         context->lo |= (value & 3) << 2;
286 }
287
288 static inline void context_set_address_root(struct context_entry *context,
289                                             unsigned long value)
290 {
291         context->lo &= ~VTD_PAGE_MASK;
292         context->lo |= value & VTD_PAGE_MASK;
293 }
294
295 static inline void context_set_address_width(struct context_entry *context,
296                                              unsigned long value)
297 {
298         context->hi |= value & 7;
299 }
300
301 static inline void context_set_domain_id(struct context_entry *context,
302                                          unsigned long value)
303 {
304         context->hi |= (value & ((1 << 16) - 1)) << 8;
305 }
306
307 static inline int context_domain_id(struct context_entry *c)
308 {
309         return((c->hi >> 8) & 0xffff);
310 }
311
312 static inline void context_clear_entry(struct context_entry *context)
313 {
314         context->lo = 0;
315         context->hi = 0;
316 }
317
318 /*
319  * 0: readable
320  * 1: writable
321  * 2-6: reserved
322  * 7: super page
323  * 8-10: available
324  * 11: snoop behavior
325  * 12-63: Host physcial address
326  */
327 struct dma_pte {
328         u64 val;
329 };
330
331 static inline void dma_clear_pte(struct dma_pte *pte)
332 {
333         pte->val = 0;
334 }
335
336 static inline u64 dma_pte_addr(struct dma_pte *pte)
337 {
338 #ifdef CONFIG_64BIT
339         return pte->val & VTD_PAGE_MASK;
340 #else
341         /* Must have a full atomic 64-bit read */
342         return  __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
343 #endif
344 }
345
346 static inline bool dma_pte_present(struct dma_pte *pte)
347 {
348         return (pte->val & 3) != 0;
349 }
350
351 static inline bool dma_pte_superpage(struct dma_pte *pte)
352 {
353         return (pte->val & DMA_PTE_LARGE_PAGE);
354 }
355
356 static inline int first_pte_in_page(struct dma_pte *pte)
357 {
358         return !((unsigned long)pte & ~VTD_PAGE_MASK);
359 }
360
361 /*
362  * This domain is a statically identity mapping domain.
363  *      1. This domain creats a static 1:1 mapping to all usable memory.
364  *      2. It maps to each iommu if successful.
365  *      3. Each iommu mapps to this domain if successful.
366  */
367 static struct dmar_domain *si_domain;
368 static int hw_pass_through = 1;
369
370 /*
371  * Domain represents a virtual machine, more than one devices
372  * across iommus may be owned in one domain, e.g. kvm guest.
373  */
374 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 0)
375
376 /* si_domain contains mulitple devices */
377 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 1)
378
379 #define for_each_domain_iommu(idx, domain)                      \
380         for (idx = 0; idx < g_num_of_iommus; idx++)             \
381                 if (domain->iommu_refcnt[idx])
382
383 struct dmar_domain {
384         int     nid;                    /* node id */
385
386         unsigned        iommu_refcnt[DMAR_UNITS_SUPPORTED];
387                                         /* Refcount of devices per iommu */
388
389
390         u16             iommu_did[DMAR_UNITS_SUPPORTED];
391                                         /* Domain ids per IOMMU. Use u16 since
392                                          * domain ids are 16 bit wide according
393                                          * to VT-d spec, section 9.3 */
394
395         bool has_iotlb_device;
396         struct list_head devices;       /* all devices' list */
397         struct iova_domain iovad;       /* iova's that belong to this domain */
398
399         struct dma_pte  *pgd;           /* virtual address */
400         int             gaw;            /* max guest address width */
401
402         /* adjusted guest address width, 0 is level 2 30-bit */
403         int             agaw;
404
405         int             flags;          /* flags to find out type of domain */
406
407         int             iommu_coherency;/* indicate coherency of iommu access */
408         int             iommu_snooping; /* indicate snooping control feature*/
409         int             iommu_count;    /* reference count of iommu */
410         int             iommu_superpage;/* Level of superpages supported:
411                                            0 == 4KiB (no superpages), 1 == 2MiB,
412                                            2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
413         u64             max_addr;       /* maximum mapped address */
414
415         struct iommu_domain domain;     /* generic domain data structure for
416                                            iommu core */
417 };
418
419 /* PCI domain-device relationship */
420 struct device_domain_info {
421         struct list_head link;  /* link to domain siblings */
422         struct list_head global; /* link to global list */
423         u8 bus;                 /* PCI bus number */
424         u8 devfn;               /* PCI devfn number */
425         u8 pasid_supported:3;
426         u8 pasid_enabled:1;
427         u8 pri_supported:1;
428         u8 pri_enabled:1;
429         u8 ats_supported:1;
430         u8 ats_enabled:1;
431         u8 ats_qdep;
432         struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
433         struct intel_iommu *iommu; /* IOMMU used by this device */
434         struct dmar_domain *domain; /* pointer to domain */
435 };
436
437 struct dmar_rmrr_unit {
438         struct list_head list;          /* list of rmrr units   */
439         struct acpi_dmar_header *hdr;   /* ACPI header          */
440         u64     base_address;           /* reserved base address*/
441         u64     end_address;            /* reserved end address */
442         struct dmar_dev_scope *devices; /* target devices */
443         int     devices_cnt;            /* target device count */
444         struct iommu_resv_region *resv; /* reserved region handle */
445 };
446
447 struct dmar_atsr_unit {
448         struct list_head list;          /* list of ATSR units */
449         struct acpi_dmar_header *hdr;   /* ACPI header */
450         struct dmar_dev_scope *devices; /* target devices */
451         int devices_cnt;                /* target device count */
452         u8 include_all:1;               /* include all ports */
453 };
454
455 static LIST_HEAD(dmar_atsr_units);
456 static LIST_HEAD(dmar_rmrr_units);
457
458 #define for_each_rmrr_units(rmrr) \
459         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
460
461 /* bitmap for indexing intel_iommus */
462 static int g_num_of_iommus;
463
464 static void domain_exit(struct dmar_domain *domain);
465 static void domain_remove_dev_info(struct dmar_domain *domain);
466 static void dmar_remove_one_dev_info(struct dmar_domain *domain,
467                                      struct device *dev);
468 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
469 static void domain_context_clear(struct intel_iommu *iommu,
470                                  struct device *dev);
471 static int domain_detach_iommu(struct dmar_domain *domain,
472                                struct intel_iommu *iommu);
473
474 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
475 int dmar_disabled = 0;
476 #else
477 int dmar_disabled = 1;
478 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
479
480 int intel_iommu_enabled = 0;
481 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
482
483 static int dmar_map_gfx = 1;
484 static int dmar_forcedac;
485 static int intel_iommu_strict;
486 static int intel_iommu_superpage = 1;
487 static int intel_iommu_ecs = 1;
488 static int iommu_identity_mapping;
489
490 #define IDENTMAP_ALL            1
491 #define IDENTMAP_GFX            2
492 #define IDENTMAP_AZALIA         4
493
494 #define ecs_enabled(iommu)      (intel_iommu_ecs && ecap_ecs(iommu->ecap))
495 #define pasid_enabled(iommu)    (ecs_enabled(iommu) && ecap_pasid(iommu->ecap))
496
497 int intel_iommu_gfx_mapped;
498 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
499
500 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
501 static DEFINE_SPINLOCK(device_domain_lock);
502 static LIST_HEAD(device_domain_list);
503
504 const struct iommu_ops intel_iommu_ops;
505
506 static bool translation_pre_enabled(struct intel_iommu *iommu)
507 {
508         return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
509 }
510
511 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
512 {
513         iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
514 }
515
516 static void init_translation_status(struct intel_iommu *iommu)
517 {
518         u32 gsts;
519
520         gsts = readl(iommu->reg + DMAR_GSTS_REG);
521         if (gsts & DMA_GSTS_TES)
522                 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
523 }
524
525 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
526 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
527 {
528         return container_of(dom, struct dmar_domain, domain);
529 }
530
531 static int __init intel_iommu_setup(char *str)
532 {
533         if (!str)
534                 return -EINVAL;
535         while (*str) {
536                 if (!strncmp(str, "on", 2)) {
537                         dmar_disabled = 0;
538                         pr_info("IOMMU enabled\n");
539                 } else if (!strncmp(str, "off", 3)) {
540                         dmar_disabled = 1;
541                         pr_info("IOMMU disabled\n");
542                 } else if (!strncmp(str, "igfx_off", 8)) {
543                         dmar_map_gfx = 0;
544                         pr_info("Disable GFX device mapping\n");
545                 } else if (!strncmp(str, "forcedac", 8)) {
546                         pr_info("Forcing DAC for PCI devices\n");
547                         dmar_forcedac = 1;
548                 } else if (!strncmp(str, "strict", 6)) {
549                         pr_info("Disable batched IOTLB flush\n");
550                         intel_iommu_strict = 1;
551                 } else if (!strncmp(str, "sp_off", 6)) {
552                         pr_info("Disable supported super page\n");
553                         intel_iommu_superpage = 0;
554                 } else if (!strncmp(str, "ecs_off", 7)) {
555                         printk(KERN_INFO
556                                 "Intel-IOMMU: disable extended context table support\n");
557                         intel_iommu_ecs = 0;
558                 } else if (!strncmp(str, "tboot_noforce", 13)) {
559                         printk(KERN_INFO
560                                 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
561                         intel_iommu_tboot_noforce = 1;
562                 }
563
564                 str += strcspn(str, ",");
565                 while (*str == ',')
566                         str++;
567         }
568         return 0;
569 }
570 __setup("intel_iommu=", intel_iommu_setup);
571
572 static struct kmem_cache *iommu_domain_cache;
573 static struct kmem_cache *iommu_devinfo_cache;
574
575 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
576 {
577         struct dmar_domain **domains;
578         int idx = did >> 8;
579
580         domains = iommu->domains[idx];
581         if (!domains)
582                 return NULL;
583
584         return domains[did & 0xff];
585 }
586
587 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
588                              struct dmar_domain *domain)
589 {
590         struct dmar_domain **domains;
591         int idx = did >> 8;
592
593         if (!iommu->domains[idx]) {
594                 size_t size = 256 * sizeof(struct dmar_domain *);
595                 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
596         }
597
598         domains = iommu->domains[idx];
599         if (WARN_ON(!domains))
600                 return;
601         else
602                 domains[did & 0xff] = domain;
603 }
604
605 static inline void *alloc_pgtable_page(int node)
606 {
607         struct page *page;
608         void *vaddr = NULL;
609
610         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
611         if (page)
612                 vaddr = page_address(page);
613         return vaddr;
614 }
615
616 static inline void free_pgtable_page(void *vaddr)
617 {
618         free_page((unsigned long)vaddr);
619 }
620
621 static inline void *alloc_domain_mem(void)
622 {
623         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
624 }
625
626 static void free_domain_mem(void *vaddr)
627 {
628         kmem_cache_free(iommu_domain_cache, vaddr);
629 }
630
631 static inline void * alloc_devinfo_mem(void)
632 {
633         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
634 }
635
636 static inline void free_devinfo_mem(void *vaddr)
637 {
638         kmem_cache_free(iommu_devinfo_cache, vaddr);
639 }
640
641 static inline int domain_type_is_vm(struct dmar_domain *domain)
642 {
643         return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
644 }
645
646 static inline int domain_type_is_si(struct dmar_domain *domain)
647 {
648         return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
649 }
650
651 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
652 {
653         return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
654                                 DOMAIN_FLAG_STATIC_IDENTITY);
655 }
656
657 static inline int domain_pfn_supported(struct dmar_domain *domain,
658                                        unsigned long pfn)
659 {
660         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
661
662         return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
663 }
664
665 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
666 {
667         unsigned long sagaw;
668         int agaw = -1;
669
670         sagaw = cap_sagaw(iommu->cap);
671         for (agaw = width_to_agaw(max_gaw);
672              agaw >= 0; agaw--) {
673                 if (test_bit(agaw, &sagaw))
674                         break;
675         }
676
677         return agaw;
678 }
679
680 /*
681  * Calculate max SAGAW for each iommu.
682  */
683 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
684 {
685         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
686 }
687
688 /*
689  * calculate agaw for each iommu.
690  * "SAGAW" may be different across iommus, use a default agaw, and
691  * get a supported less agaw for iommus that don't support the default agaw.
692  */
693 int iommu_calculate_agaw(struct intel_iommu *iommu)
694 {
695         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
696 }
697
698 /* This functionin only returns single iommu in a domain */
699 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
700 {
701         int iommu_id;
702
703         /* si_domain and vm domain should not get here. */
704         BUG_ON(domain_type_is_vm_or_si(domain));
705         for_each_domain_iommu(iommu_id, domain)
706                 break;
707
708         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
709                 return NULL;
710
711         return g_iommus[iommu_id];
712 }
713
714 static void domain_update_iommu_coherency(struct dmar_domain *domain)
715 {
716         struct dmar_drhd_unit *drhd;
717         struct intel_iommu *iommu;
718         bool found = false;
719         int i;
720
721         domain->iommu_coherency = 1;
722
723         for_each_domain_iommu(i, domain) {
724                 found = true;
725                 if (!ecap_coherent(g_iommus[i]->ecap)) {
726                         domain->iommu_coherency = 0;
727                         break;
728                 }
729         }
730         if (found)
731                 return;
732
733         /* No hardware attached; use lowest common denominator */
734         rcu_read_lock();
735         for_each_active_iommu(iommu, drhd) {
736                 if (!ecap_coherent(iommu->ecap)) {
737                         domain->iommu_coherency = 0;
738                         break;
739                 }
740         }
741         rcu_read_unlock();
742 }
743
744 static int domain_update_iommu_snooping(struct intel_iommu *skip)
745 {
746         struct dmar_drhd_unit *drhd;
747         struct intel_iommu *iommu;
748         int ret = 1;
749
750         rcu_read_lock();
751         for_each_active_iommu(iommu, drhd) {
752                 if (iommu != skip) {
753                         if (!ecap_sc_support(iommu->ecap)) {
754                                 ret = 0;
755                                 break;
756                         }
757                 }
758         }
759         rcu_read_unlock();
760
761         return ret;
762 }
763
764 static int domain_update_iommu_superpage(struct intel_iommu *skip)
765 {
766         struct dmar_drhd_unit *drhd;
767         struct intel_iommu *iommu;
768         int mask = 0xf;
769
770         if (!intel_iommu_superpage) {
771                 return 0;
772         }
773
774         /* set iommu_superpage to the smallest common denominator */
775         rcu_read_lock();
776         for_each_active_iommu(iommu, drhd) {
777                 if (iommu != skip) {
778                         mask &= cap_super_page_val(iommu->cap);
779                         if (!mask)
780                                 break;
781                 }
782         }
783         rcu_read_unlock();
784
785         return fls(mask);
786 }
787
788 /* Some capabilities may be different across iommus */
789 static void domain_update_iommu_cap(struct dmar_domain *domain)
790 {
791         domain_update_iommu_coherency(domain);
792         domain->iommu_snooping = domain_update_iommu_snooping(NULL);
793         domain->iommu_superpage = domain_update_iommu_superpage(NULL);
794 }
795
796 static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
797                                                        u8 bus, u8 devfn, int alloc)
798 {
799         struct root_entry *root = &iommu->root_entry[bus];
800         struct context_entry *context;
801         u64 *entry;
802
803         entry = &root->lo;
804         if (ecs_enabled(iommu)) {
805                 if (devfn >= 0x80) {
806                         devfn -= 0x80;
807                         entry = &root->hi;
808                 }
809                 devfn *= 2;
810         }
811         if (*entry & 1)
812                 context = phys_to_virt(*entry & VTD_PAGE_MASK);
813         else {
814                 unsigned long phy_addr;
815                 if (!alloc)
816                         return NULL;
817
818                 context = alloc_pgtable_page(iommu->node);
819                 if (!context)
820                         return NULL;
821
822                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
823                 phy_addr = virt_to_phys((void *)context);
824                 *entry = phy_addr | 1;
825                 __iommu_flush_cache(iommu, entry, sizeof(*entry));
826         }
827         return &context[devfn];
828 }
829
830 static int iommu_dummy(struct device *dev)
831 {
832         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
833 }
834
835 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
836 {
837         struct dmar_drhd_unit *drhd = NULL;
838         struct intel_iommu *iommu;
839         struct device *tmp;
840         struct pci_dev *ptmp, *pdev = NULL;
841         u16 segment = 0;
842         int i;
843
844         if (iommu_dummy(dev))
845                 return NULL;
846
847         if (dev_is_pci(dev)) {
848                 struct pci_dev *pf_pdev;
849
850                 pdev = to_pci_dev(dev);
851
852 #ifdef CONFIG_X86
853                 /* VMD child devices currently cannot be handled individually */
854                 if (is_vmd(pdev->bus))
855                         return NULL;
856 #endif
857
858                 /* VFs aren't listed in scope tables; we need to look up
859                  * the PF instead to find the IOMMU. */
860                 pf_pdev = pci_physfn(pdev);
861                 dev = &pf_pdev->dev;
862                 segment = pci_domain_nr(pdev->bus);
863         } else if (has_acpi_companion(dev))
864                 dev = &ACPI_COMPANION(dev)->dev;
865
866         rcu_read_lock();
867         for_each_active_iommu(iommu, drhd) {
868                 if (pdev && segment != drhd->segment)
869                         continue;
870
871                 for_each_active_dev_scope(drhd->devices,
872                                           drhd->devices_cnt, i, tmp) {
873                         if (tmp == dev) {
874                                 /* For a VF use its original BDF# not that of the PF
875                                  * which we used for the IOMMU lookup. Strictly speaking
876                                  * we could do this for all PCI devices; we only need to
877                                  * get the BDF# from the scope table for ACPI matches. */
878                                 if (pdev && pdev->is_virtfn)
879                                         goto got_pdev;
880
881                                 *bus = drhd->devices[i].bus;
882                                 *devfn = drhd->devices[i].devfn;
883                                 goto out;
884                         }
885
886                         if (!pdev || !dev_is_pci(tmp))
887                                 continue;
888
889                         ptmp = to_pci_dev(tmp);
890                         if (ptmp->subordinate &&
891                             ptmp->subordinate->number <= pdev->bus->number &&
892                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
893                                 goto got_pdev;
894                 }
895
896                 if (pdev && drhd->include_all) {
897                 got_pdev:
898                         *bus = pdev->bus->number;
899                         *devfn = pdev->devfn;
900                         goto out;
901                 }
902         }
903         iommu = NULL;
904  out:
905         rcu_read_unlock();
906
907         return iommu;
908 }
909
910 static void domain_flush_cache(struct dmar_domain *domain,
911                                void *addr, int size)
912 {
913         if (!domain->iommu_coherency)
914                 clflush_cache_range(addr, size);
915 }
916
917 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
918 {
919         struct context_entry *context;
920         int ret = 0;
921         unsigned long flags;
922
923         spin_lock_irqsave(&iommu->lock, flags);
924         context = iommu_context_addr(iommu, bus, devfn, 0);
925         if (context)
926                 ret = context_present(context);
927         spin_unlock_irqrestore(&iommu->lock, flags);
928         return ret;
929 }
930
931 static void free_context_table(struct intel_iommu *iommu)
932 {
933         int i;
934         unsigned long flags;
935         struct context_entry *context;
936
937         spin_lock_irqsave(&iommu->lock, flags);
938         if (!iommu->root_entry) {
939                 goto out;
940         }
941         for (i = 0; i < ROOT_ENTRY_NR; i++) {
942                 context = iommu_context_addr(iommu, i, 0, 0);
943                 if (context)
944                         free_pgtable_page(context);
945
946                 if (!ecs_enabled(iommu))
947                         continue;
948
949                 context = iommu_context_addr(iommu, i, 0x80, 0);
950                 if (context)
951                         free_pgtable_page(context);
952
953         }
954         free_pgtable_page(iommu->root_entry);
955         iommu->root_entry = NULL;
956 out:
957         spin_unlock_irqrestore(&iommu->lock, flags);
958 }
959
960 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
961                                       unsigned long pfn, int *target_level)
962 {
963         struct dma_pte *parent, *pte = NULL;
964         int level = agaw_to_level(domain->agaw);
965         int offset;
966
967         BUG_ON(!domain->pgd);
968
969         if (!domain_pfn_supported(domain, pfn))
970                 /* Address beyond IOMMU's addressing capabilities. */
971                 return NULL;
972
973         parent = domain->pgd;
974
975         while (1) {
976                 void *tmp_page;
977
978                 offset = pfn_level_offset(pfn, level);
979                 pte = &parent[offset];
980                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
981                         break;
982                 if (level == *target_level)
983                         break;
984
985                 if (!dma_pte_present(pte)) {
986                         uint64_t pteval;
987
988                         tmp_page = alloc_pgtable_page(domain->nid);
989
990                         if (!tmp_page)
991                                 return NULL;
992
993                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
994                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
995                         if (cmpxchg64(&pte->val, 0ULL, pteval))
996                                 /* Someone else set it while we were thinking; use theirs. */
997                                 free_pgtable_page(tmp_page);
998                         else
999                                 domain_flush_cache(domain, pte, sizeof(*pte));
1000                 }
1001                 if (level == 1)
1002                         break;
1003
1004                 parent = phys_to_virt(dma_pte_addr(pte));
1005                 level--;
1006         }
1007
1008         if (!*target_level)
1009                 *target_level = level;
1010
1011         return pte;
1012 }
1013
1014
1015 /* return address's pte at specific level */
1016 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1017                                          unsigned long pfn,
1018                                          int level, int *large_page)
1019 {
1020         struct dma_pte *parent, *pte = NULL;
1021         int total = agaw_to_level(domain->agaw);
1022         int offset;
1023
1024         parent = domain->pgd;
1025         while (level <= total) {
1026                 offset = pfn_level_offset(pfn, total);
1027                 pte = &parent[offset];
1028                 if (level == total)
1029                         return pte;
1030
1031                 if (!dma_pte_present(pte)) {
1032                         *large_page = total;
1033                         break;
1034                 }
1035
1036                 if (dma_pte_superpage(pte)) {
1037                         *large_page = total;
1038                         return pte;
1039                 }
1040
1041                 parent = phys_to_virt(dma_pte_addr(pte));
1042                 total--;
1043         }
1044         return NULL;
1045 }
1046
1047 /* clear last level pte, a tlb flush should be followed */
1048 static void dma_pte_clear_range(struct dmar_domain *domain,
1049                                 unsigned long start_pfn,
1050                                 unsigned long last_pfn)
1051 {
1052         unsigned int large_page = 1;
1053         struct dma_pte *first_pte, *pte;
1054
1055         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1056         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1057         BUG_ON(start_pfn > last_pfn);
1058
1059         /* we don't need lock here; nobody else touches the iova range */
1060         do {
1061                 large_page = 1;
1062                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
1063                 if (!pte) {
1064                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
1065                         continue;
1066                 }
1067                 do {
1068                         dma_clear_pte(pte);
1069                         start_pfn += lvl_to_nr_pages(large_page);
1070                         pte++;
1071                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1072
1073                 domain_flush_cache(domain, first_pte,
1074                                    (void *)pte - (void *)first_pte);
1075
1076         } while (start_pfn && start_pfn <= last_pfn);
1077 }
1078
1079 static void dma_pte_free_level(struct dmar_domain *domain, int level,
1080                                int retain_level, struct dma_pte *pte,
1081                                unsigned long pfn, unsigned long start_pfn,
1082                                unsigned long last_pfn)
1083 {
1084         pfn = max(start_pfn, pfn);
1085         pte = &pte[pfn_level_offset(pfn, level)];
1086
1087         do {
1088                 unsigned long level_pfn;
1089                 struct dma_pte *level_pte;
1090
1091                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1092                         goto next;
1093
1094                 level_pfn = pfn & level_mask(level);
1095                 level_pte = phys_to_virt(dma_pte_addr(pte));
1096
1097                 if (level > 2) {
1098                         dma_pte_free_level(domain, level - 1, retain_level,
1099                                            level_pte, level_pfn, start_pfn,
1100                                            last_pfn);
1101                 }
1102
1103                 /*
1104                  * Free the page table if we're below the level we want to
1105                  * retain and the range covers the entire table.
1106                  */
1107                 if (level < retain_level && !(start_pfn > level_pfn ||
1108                       last_pfn < level_pfn + level_size(level) - 1)) {
1109                         dma_clear_pte(pte);
1110                         domain_flush_cache(domain, pte, sizeof(*pte));
1111                         free_pgtable_page(level_pte);
1112                 }
1113 next:
1114                 pfn += level_size(level);
1115         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1116 }
1117
1118 /*
1119  * clear last level (leaf) ptes and free page table pages below the
1120  * level we wish to keep intact.
1121  */
1122 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1123                                    unsigned long start_pfn,
1124                                    unsigned long last_pfn,
1125                                    int retain_level)
1126 {
1127         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1128         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1129         BUG_ON(start_pfn > last_pfn);
1130
1131         dma_pte_clear_range(domain, start_pfn, last_pfn);
1132
1133         /* We don't need lock here; nobody else touches the iova range */
1134         dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1135                            domain->pgd, 0, start_pfn, last_pfn);
1136
1137         /* free pgd */
1138         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1139                 free_pgtable_page(domain->pgd);
1140                 domain->pgd = NULL;
1141         }
1142 }
1143
1144 /* When a page at a given level is being unlinked from its parent, we don't
1145    need to *modify* it at all. All we need to do is make a list of all the
1146    pages which can be freed just as soon as we've flushed the IOTLB and we
1147    know the hardware page-walk will no longer touch them.
1148    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1149    be freed. */
1150 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1151                                             int level, struct dma_pte *pte,
1152                                             struct page *freelist)
1153 {
1154         struct page *pg;
1155
1156         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1157         pg->freelist = freelist;
1158         freelist = pg;
1159
1160         if (level == 1)
1161                 return freelist;
1162
1163         pte = page_address(pg);
1164         do {
1165                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1166                         freelist = dma_pte_list_pagetables(domain, level - 1,
1167                                                            pte, freelist);
1168                 pte++;
1169         } while (!first_pte_in_page(pte));
1170
1171         return freelist;
1172 }
1173
1174 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1175                                         struct dma_pte *pte, unsigned long pfn,
1176                                         unsigned long start_pfn,
1177                                         unsigned long last_pfn,
1178                                         struct page *freelist)
1179 {
1180         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1181
1182         pfn = max(start_pfn, pfn);
1183         pte = &pte[pfn_level_offset(pfn, level)];
1184
1185         do {
1186                 unsigned long level_pfn;
1187
1188                 if (!dma_pte_present(pte))
1189                         goto next;
1190
1191                 level_pfn = pfn & level_mask(level);
1192
1193                 /* If range covers entire pagetable, free it */
1194                 if (start_pfn <= level_pfn &&
1195                     last_pfn >= level_pfn + level_size(level) - 1) {
1196                         /* These suborbinate page tables are going away entirely. Don't
1197                            bother to clear them; we're just going to *free* them. */
1198                         if (level > 1 && !dma_pte_superpage(pte))
1199                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1200
1201                         dma_clear_pte(pte);
1202                         if (!first_pte)
1203                                 first_pte = pte;
1204                         last_pte = pte;
1205                 } else if (level > 1) {
1206                         /* Recurse down into a level that isn't *entirely* obsolete */
1207                         freelist = dma_pte_clear_level(domain, level - 1,
1208                                                        phys_to_virt(dma_pte_addr(pte)),
1209                                                        level_pfn, start_pfn, last_pfn,
1210                                                        freelist);
1211                 }
1212 next:
1213                 pfn += level_size(level);
1214         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1215
1216         if (first_pte)
1217                 domain_flush_cache(domain, first_pte,
1218                                    (void *)++last_pte - (void *)first_pte);
1219
1220         return freelist;
1221 }
1222
1223 /* We can't just free the pages because the IOMMU may still be walking
1224    the page tables, and may have cached the intermediate levels. The
1225    pages can only be freed after the IOTLB flush has been done. */
1226 static struct page *domain_unmap(struct dmar_domain *domain,
1227                                  unsigned long start_pfn,
1228                                  unsigned long last_pfn)
1229 {
1230         struct page *freelist = NULL;
1231
1232         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1233         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1234         BUG_ON(start_pfn > last_pfn);
1235
1236         /* we don't need lock here; nobody else touches the iova range */
1237         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1238                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1239
1240         /* free pgd */
1241         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1242                 struct page *pgd_page = virt_to_page(domain->pgd);
1243                 pgd_page->freelist = freelist;
1244                 freelist = pgd_page;
1245
1246                 domain->pgd = NULL;
1247         }
1248
1249         return freelist;
1250 }
1251
1252 static void dma_free_pagelist(struct page *freelist)
1253 {
1254         struct page *pg;
1255
1256         while ((pg = freelist)) {
1257                 freelist = pg->freelist;
1258                 free_pgtable_page(page_address(pg));
1259         }
1260 }
1261
1262 static void iova_entry_free(unsigned long data)
1263 {
1264         struct page *freelist = (struct page *)data;
1265
1266         dma_free_pagelist(freelist);
1267 }
1268
1269 /* iommu handling */
1270 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1271 {
1272         struct root_entry *root;
1273         unsigned long flags;
1274
1275         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1276         if (!root) {
1277                 pr_err("Allocating root entry for %s failed\n",
1278                         iommu->name);
1279                 return -ENOMEM;
1280         }
1281
1282         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1283
1284         spin_lock_irqsave(&iommu->lock, flags);
1285         iommu->root_entry = root;
1286         spin_unlock_irqrestore(&iommu->lock, flags);
1287
1288         return 0;
1289 }
1290
1291 static void iommu_set_root_entry(struct intel_iommu *iommu)
1292 {
1293         u64 addr;
1294         u32 sts;
1295         unsigned long flag;
1296
1297         addr = virt_to_phys(iommu->root_entry);
1298         if (ecs_enabled(iommu))
1299                 addr |= DMA_RTADDR_RTT;
1300
1301         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1302         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1303
1304         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1305
1306         /* Make sure hardware complete it */
1307         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1308                       readl, (sts & DMA_GSTS_RTPS), sts);
1309
1310         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1311 }
1312
1313 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1314 {
1315         u32 val;
1316         unsigned long flag;
1317
1318         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1319                 return;
1320
1321         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1322         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1323
1324         /* Make sure hardware complete it */
1325         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1326                       readl, (!(val & DMA_GSTS_WBFS)), val);
1327
1328         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1329 }
1330
1331 /* return value determine if we need a write buffer flush */
1332 static void __iommu_flush_context(struct intel_iommu *iommu,
1333                                   u16 did, u16 source_id, u8 function_mask,
1334                                   u64 type)
1335 {
1336         u64 val = 0;
1337         unsigned long flag;
1338
1339         switch (type) {
1340         case DMA_CCMD_GLOBAL_INVL:
1341                 val = DMA_CCMD_GLOBAL_INVL;
1342                 break;
1343         case DMA_CCMD_DOMAIN_INVL:
1344                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1345                 break;
1346         case DMA_CCMD_DEVICE_INVL:
1347                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1348                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1349                 break;
1350         default:
1351                 BUG();
1352         }
1353         val |= DMA_CCMD_ICC;
1354
1355         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1356         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1357
1358         /* Make sure hardware complete it */
1359         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1360                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1361
1362         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1363 }
1364
1365 /* return value determine if we need a write buffer flush */
1366 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1367                                 u64 addr, unsigned int size_order, u64 type)
1368 {
1369         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1370         u64 val = 0, val_iva = 0;
1371         unsigned long flag;
1372
1373         switch (type) {
1374         case DMA_TLB_GLOBAL_FLUSH:
1375                 /* global flush doesn't need set IVA_REG */
1376                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1377                 break;
1378         case DMA_TLB_DSI_FLUSH:
1379                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1380                 break;
1381         case DMA_TLB_PSI_FLUSH:
1382                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1383                 /* IH bit is passed in as part of address */
1384                 val_iva = size_order | addr;
1385                 break;
1386         default:
1387                 BUG();
1388         }
1389         /* Note: set drain read/write */
1390 #if 0
1391         /*
1392          * This is probably to be super secure.. Looks like we can
1393          * ignore it without any impact.
1394          */
1395         if (cap_read_drain(iommu->cap))
1396                 val |= DMA_TLB_READ_DRAIN;
1397 #endif
1398         if (cap_write_drain(iommu->cap))
1399                 val |= DMA_TLB_WRITE_DRAIN;
1400
1401         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1402         /* Note: Only uses first TLB reg currently */
1403         if (val_iva)
1404                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1405         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1406
1407         /* Make sure hardware complete it */
1408         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1409                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1410
1411         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1412
1413         /* check IOTLB invalidation granularity */
1414         if (DMA_TLB_IAIG(val) == 0)
1415                 pr_err("Flush IOTLB failed\n");
1416         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1417                 pr_debug("TLB flush request %Lx, actual %Lx\n",
1418                         (unsigned long long)DMA_TLB_IIRG(type),
1419                         (unsigned long long)DMA_TLB_IAIG(val));
1420 }
1421
1422 static struct device_domain_info *
1423 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1424                          u8 bus, u8 devfn)
1425 {
1426         struct device_domain_info *info;
1427
1428         assert_spin_locked(&device_domain_lock);
1429
1430         if (!iommu->qi)
1431                 return NULL;
1432
1433         list_for_each_entry(info, &domain->devices, link)
1434                 if (info->iommu == iommu && info->bus == bus &&
1435                     info->devfn == devfn) {
1436                         if (info->ats_supported && info->dev)
1437                                 return info;
1438                         break;
1439                 }
1440
1441         return NULL;
1442 }
1443
1444 static void domain_update_iotlb(struct dmar_domain *domain)
1445 {
1446         struct device_domain_info *info;
1447         bool has_iotlb_device = false;
1448
1449         assert_spin_locked(&device_domain_lock);
1450
1451         list_for_each_entry(info, &domain->devices, link) {
1452                 struct pci_dev *pdev;
1453
1454                 if (!info->dev || !dev_is_pci(info->dev))
1455                         continue;
1456
1457                 pdev = to_pci_dev(info->dev);
1458                 if (pdev->ats_enabled) {
1459                         has_iotlb_device = true;
1460                         break;
1461                 }
1462         }
1463
1464         domain->has_iotlb_device = has_iotlb_device;
1465 }
1466
1467 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1468 {
1469         struct pci_dev *pdev;
1470
1471         assert_spin_locked(&device_domain_lock);
1472
1473         if (!info || !dev_is_pci(info->dev))
1474                 return;
1475
1476         pdev = to_pci_dev(info->dev);
1477
1478 #ifdef CONFIG_INTEL_IOMMU_SVM
1479         /* The PCIe spec, in its wisdom, declares that the behaviour of
1480            the device if you enable PASID support after ATS support is
1481            undefined. So always enable PASID support on devices which
1482            have it, even if we can't yet know if we're ever going to
1483            use it. */
1484         if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1485                 info->pasid_enabled = 1;
1486
1487         if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1488                 info->pri_enabled = 1;
1489 #endif
1490         if (info->ats_supported && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1491                 info->ats_enabled = 1;
1492                 domain_update_iotlb(info->domain);
1493                 info->ats_qdep = pci_ats_queue_depth(pdev);
1494         }
1495 }
1496
1497 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1498 {
1499         struct pci_dev *pdev;
1500
1501         assert_spin_locked(&device_domain_lock);
1502
1503         if (!dev_is_pci(info->dev))
1504                 return;
1505
1506         pdev = to_pci_dev(info->dev);
1507
1508         if (info->ats_enabled) {
1509                 pci_disable_ats(pdev);
1510                 info->ats_enabled = 0;
1511                 domain_update_iotlb(info->domain);
1512         }
1513 #ifdef CONFIG_INTEL_IOMMU_SVM
1514         if (info->pri_enabled) {
1515                 pci_disable_pri(pdev);
1516                 info->pri_enabled = 0;
1517         }
1518         if (info->pasid_enabled) {
1519                 pci_disable_pasid(pdev);
1520                 info->pasid_enabled = 0;
1521         }
1522 #endif
1523 }
1524
1525 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1526                                   u64 addr, unsigned mask)
1527 {
1528         u16 sid, qdep;
1529         unsigned long flags;
1530         struct device_domain_info *info;
1531
1532         if (!domain->has_iotlb_device)
1533                 return;
1534
1535         spin_lock_irqsave(&device_domain_lock, flags);
1536         list_for_each_entry(info, &domain->devices, link) {
1537                 if (!info->ats_enabled)
1538                         continue;
1539
1540                 sid = info->bus << 8 | info->devfn;
1541                 qdep = info->ats_qdep;
1542                 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1543         }
1544         spin_unlock_irqrestore(&device_domain_lock, flags);
1545 }
1546
1547 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1548                                   struct dmar_domain *domain,
1549                                   unsigned long pfn, unsigned int pages,
1550                                   int ih, int map)
1551 {
1552         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1553         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1554         u16 did = domain->iommu_did[iommu->seq_id];
1555
1556         BUG_ON(pages == 0);
1557
1558         if (ih)
1559                 ih = 1 << 6;
1560         /*
1561          * Fallback to domain selective flush if no PSI support or the size is
1562          * too big.
1563          * PSI requires page size to be 2 ^ x, and the base address is naturally
1564          * aligned to the size
1565          */
1566         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1567                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1568                                                 DMA_TLB_DSI_FLUSH);
1569         else
1570                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1571                                                 DMA_TLB_PSI_FLUSH);
1572
1573         /*
1574          * In caching mode, changes of pages from non-present to present require
1575          * flush. However, device IOTLB doesn't need to be flushed in this case.
1576          */
1577         if (!cap_caching_mode(iommu->cap) || !map)
1578                 iommu_flush_dev_iotlb(domain, addr, mask);
1579 }
1580
1581 /* Notification for newly created mappings */
1582 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1583                                         struct dmar_domain *domain,
1584                                         unsigned long pfn, unsigned int pages)
1585 {
1586         /* It's a non-present to present mapping. Only flush if caching mode */
1587         if (cap_caching_mode(iommu->cap))
1588                 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1589         else
1590                 iommu_flush_write_buffer(iommu);
1591 }
1592
1593 static void iommu_flush_iova(struct iova_domain *iovad)
1594 {
1595         struct dmar_domain *domain;
1596         int idx;
1597
1598         domain = container_of(iovad, struct dmar_domain, iovad);
1599
1600         for_each_domain_iommu(idx, domain) {
1601                 struct intel_iommu *iommu = g_iommus[idx];
1602                 u16 did = domain->iommu_did[iommu->seq_id];
1603
1604                 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1605
1606                 if (!cap_caching_mode(iommu->cap))
1607                         iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1608                                               0, MAX_AGAW_PFN_WIDTH);
1609         }
1610 }
1611
1612 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1613 {
1614         u32 pmen;
1615         unsigned long flags;
1616
1617         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1618         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1619         pmen &= ~DMA_PMEN_EPM;
1620         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1621
1622         /* wait for the protected region status bit to clear */
1623         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1624                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1625
1626         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1627 }
1628
1629 static void iommu_enable_translation(struct intel_iommu *iommu)
1630 {
1631         u32 sts;
1632         unsigned long flags;
1633
1634         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1635         iommu->gcmd |= DMA_GCMD_TE;
1636         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1637
1638         /* Make sure hardware complete it */
1639         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1640                       readl, (sts & DMA_GSTS_TES), sts);
1641
1642         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1643 }
1644
1645 static void iommu_disable_translation(struct intel_iommu *iommu)
1646 {
1647         u32 sts;
1648         unsigned long flag;
1649
1650         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1651         iommu->gcmd &= ~DMA_GCMD_TE;
1652         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1653
1654         /* Make sure hardware complete it */
1655         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1656                       readl, (!(sts & DMA_GSTS_TES)), sts);
1657
1658         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1659 }
1660
1661
1662 static int iommu_init_domains(struct intel_iommu *iommu)
1663 {
1664         u32 ndomains, nlongs;
1665         size_t size;
1666
1667         ndomains = cap_ndoms(iommu->cap);
1668         pr_debug("%s: Number of Domains supported <%d>\n",
1669                  iommu->name, ndomains);
1670         nlongs = BITS_TO_LONGS(ndomains);
1671
1672         spin_lock_init(&iommu->lock);
1673
1674         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1675         if (!iommu->domain_ids) {
1676                 pr_err("%s: Allocating domain id array failed\n",
1677                        iommu->name);
1678                 return -ENOMEM;
1679         }
1680
1681         size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1682         iommu->domains = kzalloc(size, GFP_KERNEL);
1683
1684         if (iommu->domains) {
1685                 size = 256 * sizeof(struct dmar_domain *);
1686                 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1687         }
1688
1689         if (!iommu->domains || !iommu->domains[0]) {
1690                 pr_err("%s: Allocating domain array failed\n",
1691                        iommu->name);
1692                 kfree(iommu->domain_ids);
1693                 kfree(iommu->domains);
1694                 iommu->domain_ids = NULL;
1695                 iommu->domains    = NULL;
1696                 return -ENOMEM;
1697         }
1698
1699
1700
1701         /*
1702          * If Caching mode is set, then invalid translations are tagged
1703          * with domain-id 0, hence we need to pre-allocate it. We also
1704          * use domain-id 0 as a marker for non-allocated domain-id, so
1705          * make sure it is not used for a real domain.
1706          */
1707         set_bit(0, iommu->domain_ids);
1708
1709         return 0;
1710 }
1711
1712 static void disable_dmar_iommu(struct intel_iommu *iommu)
1713 {
1714         struct device_domain_info *info, *tmp;
1715         unsigned long flags;
1716
1717         if (!iommu->domains || !iommu->domain_ids)
1718                 return;
1719
1720 again:
1721         spin_lock_irqsave(&device_domain_lock, flags);
1722         list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1723                 struct dmar_domain *domain;
1724
1725                 if (info->iommu != iommu)
1726                         continue;
1727
1728                 if (!info->dev || !info->domain)
1729                         continue;
1730
1731                 domain = info->domain;
1732
1733                 __dmar_remove_one_dev_info(info);
1734
1735                 if (!domain_type_is_vm_or_si(domain)) {
1736                         /*
1737                          * The domain_exit() function  can't be called under
1738                          * device_domain_lock, as it takes this lock itself.
1739                          * So release the lock here and re-run the loop
1740                          * afterwards.
1741                          */
1742                         spin_unlock_irqrestore(&device_domain_lock, flags);
1743                         domain_exit(domain);
1744                         goto again;
1745                 }
1746         }
1747         spin_unlock_irqrestore(&device_domain_lock, flags);
1748
1749         if (iommu->gcmd & DMA_GCMD_TE)
1750                 iommu_disable_translation(iommu);
1751 }
1752
1753 static void free_dmar_iommu(struct intel_iommu *iommu)
1754 {
1755         if ((iommu->domains) && (iommu->domain_ids)) {
1756                 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1757                 int i;
1758
1759                 for (i = 0; i < elems; i++)
1760                         kfree(iommu->domains[i]);
1761                 kfree(iommu->domains);
1762                 kfree(iommu->domain_ids);
1763                 iommu->domains = NULL;
1764                 iommu->domain_ids = NULL;
1765         }
1766
1767         g_iommus[iommu->seq_id] = NULL;
1768
1769         /* free context mapping */
1770         free_context_table(iommu);
1771
1772 #ifdef CONFIG_INTEL_IOMMU_SVM
1773         if (pasid_enabled(iommu)) {
1774                 if (ecap_prs(iommu->ecap))
1775                         intel_svm_finish_prq(iommu);
1776                 intel_svm_free_pasid_tables(iommu);
1777         }
1778 #endif
1779 }
1780
1781 static struct dmar_domain *alloc_domain(int flags)
1782 {
1783         struct dmar_domain *domain;
1784
1785         domain = alloc_domain_mem();
1786         if (!domain)
1787                 return NULL;
1788
1789         memset(domain, 0, sizeof(*domain));
1790         domain->nid = -1;
1791         domain->flags = flags;
1792         domain->has_iotlb_device = false;
1793         INIT_LIST_HEAD(&domain->devices);
1794
1795         return domain;
1796 }
1797
1798 /* Must be called with iommu->lock */
1799 static int domain_attach_iommu(struct dmar_domain *domain,
1800                                struct intel_iommu *iommu)
1801 {
1802         unsigned long ndomains;
1803         int num;
1804
1805         assert_spin_locked(&device_domain_lock);
1806         assert_spin_locked(&iommu->lock);
1807
1808         domain->iommu_refcnt[iommu->seq_id] += 1;
1809         domain->iommu_count += 1;
1810         if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1811                 ndomains = cap_ndoms(iommu->cap);
1812                 num      = find_first_zero_bit(iommu->domain_ids, ndomains);
1813
1814                 if (num >= ndomains) {
1815                         pr_err("%s: No free domain ids\n", iommu->name);
1816                         domain->iommu_refcnt[iommu->seq_id] -= 1;
1817                         domain->iommu_count -= 1;
1818                         return -ENOSPC;
1819                 }
1820
1821                 set_bit(num, iommu->domain_ids);
1822                 set_iommu_domain(iommu, num, domain);
1823
1824                 domain->iommu_did[iommu->seq_id] = num;
1825                 domain->nid                      = iommu->node;
1826
1827                 domain_update_iommu_cap(domain);
1828         }
1829
1830         return 0;
1831 }
1832
1833 static int domain_detach_iommu(struct dmar_domain *domain,
1834                                struct intel_iommu *iommu)
1835 {
1836         int num, count = INT_MAX;
1837
1838         assert_spin_locked(&device_domain_lock);
1839         assert_spin_locked(&iommu->lock);
1840
1841         domain->iommu_refcnt[iommu->seq_id] -= 1;
1842         count = --domain->iommu_count;
1843         if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1844                 num = domain->iommu_did[iommu->seq_id];
1845                 clear_bit(num, iommu->domain_ids);
1846                 set_iommu_domain(iommu, num, NULL);
1847
1848                 domain_update_iommu_cap(domain);
1849                 domain->iommu_did[iommu->seq_id] = 0;
1850         }
1851
1852         return count;
1853 }
1854
1855 static struct iova_domain reserved_iova_list;
1856 static struct lock_class_key reserved_rbtree_key;
1857
1858 static int dmar_init_reserved_ranges(void)
1859 {
1860         struct pci_dev *pdev = NULL;
1861         struct iova *iova;
1862         int i;
1863
1864         init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1865
1866         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1867                 &reserved_rbtree_key);
1868
1869         /* IOAPIC ranges shouldn't be accessed by DMA */
1870         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1871                 IOVA_PFN(IOAPIC_RANGE_END));
1872         if (!iova) {
1873                 pr_err("Reserve IOAPIC range failed\n");
1874                 return -ENODEV;
1875         }
1876
1877         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1878         for_each_pci_dev(pdev) {
1879                 struct resource *r;
1880
1881                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1882                         r = &pdev->resource[i];
1883                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1884                                 continue;
1885                         iova = reserve_iova(&reserved_iova_list,
1886                                             IOVA_PFN(r->start),
1887                                             IOVA_PFN(r->end));
1888                         if (!iova) {
1889                                 pr_err("Reserve iova failed\n");
1890                                 return -ENODEV;
1891                         }
1892                 }
1893         }
1894         return 0;
1895 }
1896
1897 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1898 {
1899         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1900 }
1901
1902 static inline int guestwidth_to_adjustwidth(int gaw)
1903 {
1904         int agaw;
1905         int r = (gaw - 12) % 9;
1906
1907         if (r == 0)
1908                 agaw = gaw;
1909         else
1910                 agaw = gaw + 9 - r;
1911         if (agaw > 64)
1912                 agaw = 64;
1913         return agaw;
1914 }
1915
1916 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1917                        int guest_width)
1918 {
1919         int adjust_width, agaw;
1920         unsigned long sagaw;
1921         int err;
1922
1923         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1924
1925         err = init_iova_flush_queue(&domain->iovad,
1926                                     iommu_flush_iova, iova_entry_free);
1927         if (err)
1928                 return err;
1929
1930         domain_reserve_special_ranges(domain);
1931
1932         /* calculate AGAW */
1933         if (guest_width > cap_mgaw(iommu->cap))
1934                 guest_width = cap_mgaw(iommu->cap);
1935         domain->gaw = guest_width;
1936         adjust_width = guestwidth_to_adjustwidth(guest_width);
1937         agaw = width_to_agaw(adjust_width);
1938         sagaw = cap_sagaw(iommu->cap);
1939         if (!test_bit(agaw, &sagaw)) {
1940                 /* hardware doesn't support it, choose a bigger one */
1941                 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1942                 agaw = find_next_bit(&sagaw, 5, agaw);
1943                 if (agaw >= 5)
1944                         return -ENODEV;
1945         }
1946         domain->agaw = agaw;
1947
1948         if (ecap_coherent(iommu->ecap))
1949                 domain->iommu_coherency = 1;
1950         else
1951                 domain->iommu_coherency = 0;
1952
1953         if (ecap_sc_support(iommu->ecap))
1954                 domain->iommu_snooping = 1;
1955         else
1956                 domain->iommu_snooping = 0;
1957
1958         if (intel_iommu_superpage)
1959                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1960         else
1961                 domain->iommu_superpage = 0;
1962
1963         domain->nid = iommu->node;
1964
1965         /* always allocate the top pgd */
1966         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1967         if (!domain->pgd)
1968                 return -ENOMEM;
1969         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1970         return 0;
1971 }
1972
1973 static void domain_exit(struct dmar_domain *domain)
1974 {
1975         struct page *freelist = NULL;
1976
1977         /* Domain 0 is reserved, so dont process it */
1978         if (!domain)
1979                 return;
1980
1981         /* Remove associated devices and clear attached or cached domains */
1982         rcu_read_lock();
1983         domain_remove_dev_info(domain);
1984         rcu_read_unlock();
1985
1986         /* destroy iovas */
1987         put_iova_domain(&domain->iovad);
1988
1989         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1990
1991         dma_free_pagelist(freelist);
1992
1993         free_domain_mem(domain);
1994 }
1995
1996 static int domain_context_mapping_one(struct dmar_domain *domain,
1997                                       struct intel_iommu *iommu,
1998                                       u8 bus, u8 devfn)
1999 {
2000         u16 did = domain->iommu_did[iommu->seq_id];
2001         int translation = CONTEXT_TT_MULTI_LEVEL;
2002         struct device_domain_info *info = NULL;
2003         struct context_entry *context;
2004         unsigned long flags;
2005         struct dma_pte *pgd;
2006         int ret, agaw;
2007
2008         WARN_ON(did == 0);
2009
2010         if (hw_pass_through && domain_type_is_si(domain))
2011                 translation = CONTEXT_TT_PASS_THROUGH;
2012
2013         pr_debug("Set context mapping for %02x:%02x.%d\n",
2014                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
2015
2016         BUG_ON(!domain->pgd);
2017
2018         spin_lock_irqsave(&device_domain_lock, flags);
2019         spin_lock(&iommu->lock);
2020
2021         ret = -ENOMEM;
2022         context = iommu_context_addr(iommu, bus, devfn, 1);
2023         if (!context)
2024                 goto out_unlock;
2025
2026         ret = 0;
2027         if (context_present(context))
2028                 goto out_unlock;
2029
2030         /*
2031          * For kdump cases, old valid entries may be cached due to the
2032          * in-flight DMA and copied pgtable, but there is no unmapping
2033          * behaviour for them, thus we need an explicit cache flush for
2034          * the newly-mapped device. For kdump, at this point, the device
2035          * is supposed to finish reset at its driver probe stage, so no
2036          * in-flight DMA will exist, and we don't need to worry anymore
2037          * hereafter.
2038          */
2039         if (context_copied(context)) {
2040                 u16 did_old = context_domain_id(context);
2041
2042                 if (did_old < cap_ndoms(iommu->cap)) {
2043                         iommu->flush.flush_context(iommu, did_old,
2044                                                    (((u16)bus) << 8) | devfn,
2045                                                    DMA_CCMD_MASK_NOBIT,
2046                                                    DMA_CCMD_DEVICE_INVL);
2047                         iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2048                                                  DMA_TLB_DSI_FLUSH);
2049                 }
2050         }
2051
2052         pgd = domain->pgd;
2053
2054         context_clear_entry(context);
2055         context_set_domain_id(context, did);
2056
2057         /*
2058          * Skip top levels of page tables for iommu which has less agaw
2059          * than default.  Unnecessary for PT mode.
2060          */
2061         if (translation != CONTEXT_TT_PASS_THROUGH) {
2062                 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
2063                         ret = -ENOMEM;
2064                         pgd = phys_to_virt(dma_pte_addr(pgd));
2065                         if (!dma_pte_present(pgd))
2066                                 goto out_unlock;
2067                 }
2068
2069                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2070                 if (info && info->ats_supported)
2071                         translation = CONTEXT_TT_DEV_IOTLB;
2072                 else
2073                         translation = CONTEXT_TT_MULTI_LEVEL;
2074
2075                 context_set_address_root(context, virt_to_phys(pgd));
2076                 context_set_address_width(context, iommu->agaw);
2077         } else {
2078                 /*
2079                  * In pass through mode, AW must be programmed to
2080                  * indicate the largest AGAW value supported by
2081                  * hardware. And ASR is ignored by hardware.
2082                  */
2083                 context_set_address_width(context, iommu->msagaw);
2084         }
2085
2086         context_set_translation_type(context, translation);
2087         context_set_fault_enable(context);
2088         context_set_present(context);
2089         domain_flush_cache(domain, context, sizeof(*context));
2090
2091         /*
2092          * It's a non-present to present mapping. If hardware doesn't cache
2093          * non-present entry we only need to flush the write-buffer. If the
2094          * _does_ cache non-present entries, then it does so in the special
2095          * domain #0, which we have to flush:
2096          */
2097         if (cap_caching_mode(iommu->cap)) {
2098                 iommu->flush.flush_context(iommu, 0,
2099                                            (((u16)bus) << 8) | devfn,
2100                                            DMA_CCMD_MASK_NOBIT,
2101                                            DMA_CCMD_DEVICE_INVL);
2102                 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2103         } else {
2104                 iommu_flush_write_buffer(iommu);
2105         }
2106         iommu_enable_dev_iotlb(info);
2107
2108         ret = 0;
2109
2110 out_unlock:
2111         spin_unlock(&iommu->lock);
2112         spin_unlock_irqrestore(&device_domain_lock, flags);
2113
2114         return ret;
2115 }
2116
2117 struct domain_context_mapping_data {
2118         struct dmar_domain *domain;
2119         struct intel_iommu *iommu;
2120 };
2121
2122 static int domain_context_mapping_cb(struct pci_dev *pdev,
2123                                      u16 alias, void *opaque)
2124 {
2125         struct domain_context_mapping_data *data = opaque;
2126
2127         return domain_context_mapping_one(data->domain, data->iommu,
2128                                           PCI_BUS_NUM(alias), alias & 0xff);
2129 }
2130
2131 static int
2132 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2133 {
2134         struct intel_iommu *iommu;
2135         u8 bus, devfn;
2136         struct domain_context_mapping_data data;
2137
2138         iommu = device_to_iommu(dev, &bus, &devfn);
2139         if (!iommu)
2140                 return -ENODEV;
2141
2142         if (!dev_is_pci(dev))
2143                 return domain_context_mapping_one(domain, iommu, bus, devfn);
2144
2145         data.domain = domain;
2146         data.iommu = iommu;
2147
2148         return pci_for_each_dma_alias(to_pci_dev(dev),
2149                                       &domain_context_mapping_cb, &data);
2150 }
2151
2152 static int domain_context_mapped_cb(struct pci_dev *pdev,
2153                                     u16 alias, void *opaque)
2154 {
2155         struct intel_iommu *iommu = opaque;
2156
2157         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2158 }
2159
2160 static int domain_context_mapped(struct device *dev)
2161 {
2162         struct intel_iommu *iommu;
2163         u8 bus, devfn;
2164
2165         iommu = device_to_iommu(dev, &bus, &devfn);
2166         if (!iommu)
2167                 return -ENODEV;
2168
2169         if (!dev_is_pci(dev))
2170                 return device_context_mapped(iommu, bus, devfn);
2171
2172         return !pci_for_each_dma_alias(to_pci_dev(dev),
2173                                        domain_context_mapped_cb, iommu);
2174 }
2175
2176 /* Returns a number of VTD pages, but aligned to MM page size */
2177 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2178                                             size_t size)
2179 {
2180         host_addr &= ~PAGE_MASK;
2181         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2182 }
2183
2184 /* Return largest possible superpage level for a given mapping */
2185 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2186                                           unsigned long iov_pfn,
2187                                           unsigned long phy_pfn,
2188                                           unsigned long pages)
2189 {
2190         int support, level = 1;
2191         unsigned long pfnmerge;
2192
2193         support = domain->iommu_superpage;
2194
2195         /* To use a large page, the virtual *and* physical addresses
2196            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2197            of them will mean we have to use smaller pages. So just
2198            merge them and check both at once. */
2199         pfnmerge = iov_pfn | phy_pfn;
2200
2201         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2202                 pages >>= VTD_STRIDE_SHIFT;
2203                 if (!pages)
2204                         break;
2205                 pfnmerge >>= VTD_STRIDE_SHIFT;
2206                 level++;
2207                 support--;
2208         }
2209         return level;
2210 }
2211
2212 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2213                             struct scatterlist *sg, unsigned long phys_pfn,
2214                             unsigned long nr_pages, int prot)
2215 {
2216         struct dma_pte *first_pte = NULL, *pte = NULL;
2217         phys_addr_t uninitialized_var(pteval);
2218         unsigned long sg_res = 0;
2219         unsigned int largepage_lvl = 0;
2220         unsigned long lvl_pages = 0;
2221
2222         BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2223
2224         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2225                 return -EINVAL;
2226
2227         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2228
2229         if (!sg) {
2230                 sg_res = nr_pages;
2231                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2232         }
2233
2234         while (nr_pages > 0) {
2235                 uint64_t tmp;
2236
2237                 if (!sg_res) {
2238                         unsigned int pgoff = sg->offset & ~PAGE_MASK;
2239
2240                         sg_res = aligned_nrpages(sg->offset, sg->length);
2241                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2242                         sg->dma_length = sg->length;
2243                         pteval = (sg_phys(sg) - pgoff) | prot;
2244                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2245                 }
2246
2247                 if (!pte) {
2248                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2249
2250                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2251                         if (!pte)
2252                                 return -ENOMEM;
2253                         /* It is large page*/
2254                         if (largepage_lvl > 1) {
2255                                 unsigned long nr_superpages, end_pfn;
2256
2257                                 pteval |= DMA_PTE_LARGE_PAGE;
2258                                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2259
2260                                 nr_superpages = sg_res / lvl_pages;
2261                                 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2262
2263                                 /*
2264                                  * Ensure that old small page tables are
2265                                  * removed to make room for superpage(s).
2266                                  * We're adding new large pages, so make sure
2267                                  * we don't remove their parent tables.
2268                                  */
2269                                 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2270                                                        largepage_lvl + 1);
2271                         } else {
2272                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2273                         }
2274
2275                 }
2276                 /* We don't need lock here, nobody else
2277                  * touches the iova range
2278                  */
2279                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2280                 if (tmp) {
2281                         static int dumps = 5;
2282                         pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2283                                 iov_pfn, tmp, (unsigned long long)pteval);
2284                         if (dumps) {
2285                                 dumps--;
2286                                 debug_dma_dump_mappings(NULL);
2287                         }
2288                         WARN_ON(1);
2289                 }
2290
2291                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2292
2293                 BUG_ON(nr_pages < lvl_pages);
2294                 BUG_ON(sg_res < lvl_pages);
2295
2296                 nr_pages -= lvl_pages;
2297                 iov_pfn += lvl_pages;
2298                 phys_pfn += lvl_pages;
2299                 pteval += lvl_pages * VTD_PAGE_SIZE;
2300                 sg_res -= lvl_pages;
2301
2302                 /* If the next PTE would be the first in a new page, then we
2303                    need to flush the cache on the entries we've just written.
2304                    And then we'll need to recalculate 'pte', so clear it and
2305                    let it get set again in the if (!pte) block above.
2306
2307                    If we're done (!nr_pages) we need to flush the cache too.
2308
2309                    Also if we've been setting superpages, we may need to
2310                    recalculate 'pte' and switch back to smaller pages for the
2311                    end of the mapping, if the trailing size is not enough to
2312                    use another superpage (i.e. sg_res < lvl_pages). */
2313                 pte++;
2314                 if (!nr_pages || first_pte_in_page(pte) ||
2315                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2316                         domain_flush_cache(domain, first_pte,
2317                                            (void *)pte - (void *)first_pte);
2318                         pte = NULL;
2319                 }
2320
2321                 if (!sg_res && nr_pages)
2322                         sg = sg_next(sg);
2323         }
2324         return 0;
2325 }
2326
2327 static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2328                          struct scatterlist *sg, unsigned long phys_pfn,
2329                          unsigned long nr_pages, int prot)
2330 {
2331        int ret;
2332        struct intel_iommu *iommu;
2333
2334        /* Do the real mapping first */
2335        ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2336        if (ret)
2337                return ret;
2338
2339        /* Notify about the new mapping */
2340        if (domain_type_is_vm(domain)) {
2341                /* VM typed domains can have more than one IOMMUs */
2342                int iommu_id;
2343                for_each_domain_iommu(iommu_id, domain) {
2344                        iommu = g_iommus[iommu_id];
2345                        __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2346                }
2347        } else {
2348                /* General domains only have one IOMMU */
2349                iommu = domain_get_iommu(domain);
2350                __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2351        }
2352
2353        return 0;
2354 }
2355
2356 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2357                                     struct scatterlist *sg, unsigned long nr_pages,
2358                                     int prot)
2359 {
2360         return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2361 }
2362
2363 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2364                                      unsigned long phys_pfn, unsigned long nr_pages,
2365                                      int prot)
2366 {
2367         return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2368 }
2369
2370 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2371 {
2372         unsigned long flags;
2373         struct context_entry *context;
2374         u16 did_old;
2375
2376         if (!iommu)
2377                 return;
2378
2379         spin_lock_irqsave(&iommu->lock, flags);
2380         context = iommu_context_addr(iommu, bus, devfn, 0);
2381         if (!context) {
2382                 spin_unlock_irqrestore(&iommu->lock, flags);
2383                 return;
2384         }
2385         did_old = context_domain_id(context);
2386         context_clear_entry(context);
2387         __iommu_flush_cache(iommu, context, sizeof(*context));
2388         spin_unlock_irqrestore(&iommu->lock, flags);
2389         iommu->flush.flush_context(iommu,
2390                                    did_old,
2391                                    (((u16)bus) << 8) | devfn,
2392                                    DMA_CCMD_MASK_NOBIT,
2393                                    DMA_CCMD_DEVICE_INVL);
2394         iommu->flush.flush_iotlb(iommu,
2395                                  did_old,
2396                                  0,
2397                                  0,
2398                                  DMA_TLB_DSI_FLUSH);
2399 }
2400
2401 static inline void unlink_domain_info(struct device_domain_info *info)
2402 {
2403         assert_spin_locked(&device_domain_lock);
2404         list_del(&info->link);
2405         list_del(&info->global);
2406         if (info->dev)
2407                 info->dev->archdata.iommu = NULL;
2408 }
2409
2410 static void domain_remove_dev_info(struct dmar_domain *domain)
2411 {
2412         struct device_domain_info *info, *tmp;
2413         unsigned long flags;
2414
2415         spin_lock_irqsave(&device_domain_lock, flags);
2416         list_for_each_entry_safe(info, tmp, &domain->devices, link)
2417                 __dmar_remove_one_dev_info(info);
2418         spin_unlock_irqrestore(&device_domain_lock, flags);
2419 }
2420
2421 /*
2422  * find_domain
2423  * Note: we use struct device->archdata.iommu stores the info
2424  */
2425 static struct dmar_domain *find_domain(struct device *dev)
2426 {
2427         struct device_domain_info *info;
2428
2429         /* No lock here, assumes no domain exit in normal case */
2430         info = dev->archdata.iommu;
2431         if (likely(info))
2432                 return info->domain;
2433         return NULL;
2434 }
2435
2436 static inline struct device_domain_info *
2437 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2438 {
2439         struct device_domain_info *info;
2440
2441         list_for_each_entry(info, &device_domain_list, global)
2442                 if (info->iommu->segment == segment && info->bus == bus &&
2443                     info->devfn == devfn)
2444                         return info;
2445
2446         return NULL;
2447 }
2448
2449 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2450                                                     int bus, int devfn,
2451                                                     struct device *dev,
2452                                                     struct dmar_domain *domain)
2453 {
2454         struct dmar_domain *found = NULL;
2455         struct device_domain_info *info;
2456         unsigned long flags;
2457         int ret;
2458
2459         info = alloc_devinfo_mem();
2460         if (!info)
2461                 return NULL;
2462
2463         info->bus = bus;
2464         info->devfn = devfn;
2465         info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2466         info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2467         info->ats_qdep = 0;
2468         info->dev = dev;
2469         info->domain = domain;
2470         info->iommu = iommu;
2471
2472         if (dev && dev_is_pci(dev)) {
2473                 struct pci_dev *pdev = to_pci_dev(info->dev);
2474
2475                 if (!pci_ats_disabled() &&
2476                     ecap_dev_iotlb_support(iommu->ecap) &&
2477                     pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2478                     dmar_find_matched_atsr_unit(pdev))
2479                         info->ats_supported = 1;
2480
2481                 if (ecs_enabled(iommu)) {
2482                         if (pasid_enabled(iommu)) {
2483                                 int features = pci_pasid_features(pdev);
2484                                 if (features >= 0)
2485                                         info->pasid_supported = features | 1;
2486                         }
2487
2488                         if (info->ats_supported && ecap_prs(iommu->ecap) &&
2489                             pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2490                                 info->pri_supported = 1;
2491                 }
2492         }
2493
2494         spin_lock_irqsave(&device_domain_lock, flags);
2495         if (dev)
2496                 found = find_domain(dev);
2497
2498         if (!found) {
2499                 struct device_domain_info *info2;
2500                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2501                 if (info2) {
2502                         found      = info2->domain;
2503                         info2->dev = dev;
2504                 }
2505         }
2506
2507         if (found) {
2508                 spin_unlock_irqrestore(&device_domain_lock, flags);
2509                 free_devinfo_mem(info);
2510                 /* Caller must free the original domain */
2511                 return found;
2512         }
2513
2514         spin_lock(&iommu->lock);
2515         ret = domain_attach_iommu(domain, iommu);
2516         spin_unlock(&iommu->lock);
2517
2518         if (ret) {
2519                 spin_unlock_irqrestore(&device_domain_lock, flags);
2520                 free_devinfo_mem(info);
2521                 return NULL;
2522         }
2523
2524         list_add(&info->link, &domain->devices);
2525         list_add(&info->global, &device_domain_list);
2526         if (dev)
2527                 dev->archdata.iommu = info;
2528         spin_unlock_irqrestore(&device_domain_lock, flags);
2529
2530         if (dev && domain_context_mapping(domain, dev)) {
2531                 pr_err("Domain context map for %s failed\n", dev_name(dev));
2532                 dmar_remove_one_dev_info(domain, dev);
2533                 return NULL;
2534         }
2535
2536         return domain;
2537 }
2538
2539 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2540 {
2541         *(u16 *)opaque = alias;
2542         return 0;
2543 }
2544
2545 static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
2546 {
2547         struct device_domain_info *info = NULL;
2548         struct dmar_domain *domain = NULL;
2549         struct intel_iommu *iommu;
2550         u16 dma_alias;
2551         unsigned long flags;
2552         u8 bus, devfn;
2553
2554         iommu = device_to_iommu(dev, &bus, &devfn);
2555         if (!iommu)
2556                 return NULL;
2557
2558         if (dev_is_pci(dev)) {
2559                 struct pci_dev *pdev = to_pci_dev(dev);
2560
2561                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2562
2563                 spin_lock_irqsave(&device_domain_lock, flags);
2564                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2565                                                       PCI_BUS_NUM(dma_alias),
2566                                                       dma_alias & 0xff);
2567                 if (info) {
2568                         iommu = info->iommu;
2569                         domain = info->domain;
2570                 }
2571                 spin_unlock_irqrestore(&device_domain_lock, flags);
2572
2573                 /* DMA alias already has a domain, use it */
2574                 if (info)
2575                         goto out;
2576         }
2577
2578         /* Allocate and initialize new domain for the device */
2579         domain = alloc_domain(0);
2580         if (!domain)
2581                 return NULL;
2582         if (domain_init(domain, iommu, gaw)) {
2583                 domain_exit(domain);
2584                 return NULL;
2585         }
2586
2587 out:
2588
2589         return domain;
2590 }
2591
2592 static struct dmar_domain *set_domain_for_dev(struct device *dev,
2593                                               struct dmar_domain *domain)
2594 {
2595         struct intel_iommu *iommu;
2596         struct dmar_domain *tmp;
2597         u16 req_id, dma_alias;
2598         u8 bus, devfn;
2599
2600         iommu = device_to_iommu(dev, &bus, &devfn);
2601         if (!iommu)
2602                 return NULL;
2603
2604         req_id = ((u16)bus << 8) | devfn;
2605
2606         if (dev_is_pci(dev)) {
2607                 struct pci_dev *pdev = to_pci_dev(dev);
2608
2609                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2610
2611                 /* register PCI DMA alias device */
2612                 if (req_id != dma_alias) {
2613                         tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2614                                         dma_alias & 0xff, NULL, domain);
2615
2616                         if (!tmp || tmp != domain)
2617                                 return tmp;
2618                 }
2619         }
2620
2621         tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2622         if (!tmp || tmp != domain)
2623                 return tmp;
2624
2625         return domain;
2626 }
2627
2628 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2629 {
2630         struct dmar_domain *domain, *tmp;
2631
2632         domain = find_domain(dev);
2633         if (domain)
2634                 goto out;
2635
2636         domain = find_or_alloc_domain(dev, gaw);
2637         if (!domain)
2638                 goto out;
2639
2640         tmp = set_domain_for_dev(dev, domain);
2641         if (!tmp || domain != tmp) {
2642                 domain_exit(domain);
2643                 domain = tmp;
2644         }
2645
2646 out:
2647
2648         return domain;
2649 }
2650
2651 static int iommu_domain_identity_map(struct dmar_domain *domain,
2652                                      unsigned long long start,
2653                                      unsigned long long end)
2654 {
2655         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2656         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2657
2658         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2659                           dma_to_mm_pfn(last_vpfn))) {
2660                 pr_err("Reserving iova failed\n");
2661                 return -ENOMEM;
2662         }
2663
2664         pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2665         /*
2666          * RMRR range might have overlap with physical memory range,
2667          * clear it first
2668          */
2669         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2670
2671         return __domain_mapping(domain, first_vpfn, NULL,
2672                                 first_vpfn, last_vpfn - first_vpfn + 1,
2673                                 DMA_PTE_READ|DMA_PTE_WRITE);
2674 }
2675
2676 static int domain_prepare_identity_map(struct device *dev,
2677                                        struct dmar_domain *domain,
2678                                        unsigned long long start,
2679                                        unsigned long long end)
2680 {
2681         /* For _hardware_ passthrough, don't bother. But for software
2682            passthrough, we do it anyway -- it may indicate a memory
2683            range which is reserved in E820, so which didn't get set
2684            up to start with in si_domain */
2685         if (domain == si_domain && hw_pass_through) {
2686                 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2687                         dev_name(dev), start, end);
2688                 return 0;
2689         }
2690
2691         pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2692                 dev_name(dev), start, end);
2693
2694         if (end < start) {
2695                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2696                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2697                         dmi_get_system_info(DMI_BIOS_VENDOR),
2698                         dmi_get_system_info(DMI_BIOS_VERSION),
2699                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2700                 return -EIO;
2701         }
2702
2703         if (end >> agaw_to_width(domain->agaw)) {
2704                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2705                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2706                      agaw_to_width(domain->agaw),
2707                      dmi_get_system_info(DMI_BIOS_VENDOR),
2708                      dmi_get_system_info(DMI_BIOS_VERSION),
2709                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2710                 return -EIO;
2711         }
2712
2713         return iommu_domain_identity_map(domain, start, end);
2714 }
2715
2716 static int iommu_prepare_identity_map(struct device *dev,
2717                                       unsigned long long start,
2718                                       unsigned long long end)
2719 {
2720         struct dmar_domain *domain;
2721         int ret;
2722
2723         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2724         if (!domain)
2725                 return -ENOMEM;
2726
2727         ret = domain_prepare_identity_map(dev, domain, start, end);
2728         if (ret)
2729                 domain_exit(domain);
2730
2731         return ret;
2732 }
2733
2734 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2735                                          struct device *dev)
2736 {
2737         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2738                 return 0;
2739         return iommu_prepare_identity_map(dev, rmrr->base_address,
2740                                           rmrr->end_address);
2741 }
2742
2743 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2744 static inline void iommu_prepare_isa(void)
2745 {
2746         struct pci_dev *pdev;
2747         int ret;
2748
2749         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2750         if (!pdev)
2751                 return;
2752
2753         pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2754         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2755
2756         if (ret)
2757                 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2758
2759         pci_dev_put(pdev);
2760 }
2761 #else
2762 static inline void iommu_prepare_isa(void)
2763 {
2764         return;
2765 }
2766 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2767
2768 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2769
2770 static int __init si_domain_init(int hw)
2771 {
2772         int nid, ret = 0;
2773
2774         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2775         if (!si_domain)
2776                 return -EFAULT;
2777
2778         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2779                 domain_exit(si_domain);
2780                 return -EFAULT;
2781         }
2782
2783         pr_debug("Identity mapping domain allocated\n");
2784
2785         if (hw)
2786                 return 0;
2787
2788         for_each_online_node(nid) {
2789                 unsigned long start_pfn, end_pfn;
2790                 int i;
2791
2792                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2793                         ret = iommu_domain_identity_map(si_domain,
2794                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2795                         if (ret)
2796                                 return ret;
2797                 }
2798         }
2799
2800         return 0;
2801 }
2802
2803 static int identity_mapping(struct device *dev)
2804 {
2805         struct device_domain_info *info;
2806
2807         if (likely(!iommu_identity_mapping))
2808                 return 0;
2809
2810         info = dev->archdata.iommu;
2811         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2812                 return (info->domain == si_domain);
2813
2814         return 0;
2815 }
2816
2817 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2818 {
2819         struct dmar_domain *ndomain;
2820         struct intel_iommu *iommu;
2821         u8 bus, devfn;
2822
2823         iommu = device_to_iommu(dev, &bus, &devfn);
2824         if (!iommu)
2825                 return -ENODEV;
2826
2827         ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2828         if (ndomain != domain)
2829                 return -EBUSY;
2830
2831         return 0;
2832 }
2833
2834 static bool device_has_rmrr(struct device *dev)
2835 {
2836         struct dmar_rmrr_unit *rmrr;
2837         struct device *tmp;
2838         int i;
2839
2840         rcu_read_lock();
2841         for_each_rmrr_units(rmrr) {
2842                 /*
2843                  * Return TRUE if this RMRR contains the device that
2844                  * is passed in.
2845                  */
2846                 for_each_active_dev_scope(rmrr->devices,
2847                                           rmrr->devices_cnt, i, tmp)
2848                         if (tmp == dev) {
2849                                 rcu_read_unlock();
2850                                 return true;
2851                         }
2852         }
2853         rcu_read_unlock();
2854         return false;
2855 }
2856
2857 /*
2858  * There are a couple cases where we need to restrict the functionality of
2859  * devices associated with RMRRs.  The first is when evaluating a device for
2860  * identity mapping because problems exist when devices are moved in and out
2861  * of domains and their respective RMRR information is lost.  This means that
2862  * a device with associated RMRRs will never be in a "passthrough" domain.
2863  * The second is use of the device through the IOMMU API.  This interface
2864  * expects to have full control of the IOVA space for the device.  We cannot
2865  * satisfy both the requirement that RMRR access is maintained and have an
2866  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2867  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2868  * We therefore prevent devices associated with an RMRR from participating in
2869  * the IOMMU API, which eliminates them from device assignment.
2870  *
2871  * In both cases we assume that PCI USB devices with RMRRs have them largely
2872  * for historical reasons and that the RMRR space is not actively used post
2873  * boot.  This exclusion may change if vendors begin to abuse it.
2874  *
2875  * The same exception is made for graphics devices, with the requirement that
2876  * any use of the RMRR regions will be torn down before assigning the device
2877  * to a guest.
2878  */
2879 static bool device_is_rmrr_locked(struct device *dev)
2880 {
2881         if (!device_has_rmrr(dev))
2882                 return false;
2883
2884         if (dev_is_pci(dev)) {
2885                 struct pci_dev *pdev = to_pci_dev(dev);
2886
2887                 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2888                         return false;
2889         }
2890
2891         return true;
2892 }
2893
2894 static int iommu_should_identity_map(struct device *dev, int startup)
2895 {
2896
2897         if (dev_is_pci(dev)) {
2898                 struct pci_dev *pdev = to_pci_dev(dev);
2899
2900                 if (device_is_rmrr_locked(dev))
2901                         return 0;
2902
2903                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2904                         return 1;
2905
2906                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2907                         return 1;
2908
2909                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2910                         return 0;
2911
2912                 /*
2913                  * We want to start off with all devices in the 1:1 domain, and
2914                  * take them out later if we find they can't access all of memory.
2915                  *
2916                  * However, we can't do this for PCI devices behind bridges,
2917                  * because all PCI devices behind the same bridge will end up
2918                  * with the same source-id on their transactions.
2919                  *
2920                  * Practically speaking, we can't change things around for these
2921                  * devices at run-time, because we can't be sure there'll be no
2922                  * DMA transactions in flight for any of their siblings.
2923                  *
2924                  * So PCI devices (unless they're on the root bus) as well as
2925                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2926                  * the 1:1 domain, just in _case_ one of their siblings turns out
2927                  * not to be able to map all of memory.
2928                  */
2929                 if (!pci_is_pcie(pdev)) {
2930                         if (!pci_is_root_bus(pdev->bus))
2931                                 return 0;
2932                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2933                                 return 0;
2934                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2935                         return 0;
2936         } else {
2937                 if (device_has_rmrr(dev))
2938                         return 0;
2939         }
2940
2941         /*
2942          * At boot time, we don't yet know if devices will be 64-bit capable.
2943          * Assume that they will — if they turn out not to be, then we can
2944          * take them out of the 1:1 domain later.
2945          */
2946         if (!startup) {
2947                 /*
2948                  * If the device's dma_mask is less than the system's memory
2949                  * size then this is not a candidate for identity mapping.
2950                  */
2951                 u64 dma_mask = *dev->dma_mask;
2952
2953                 if (dev->coherent_dma_mask &&
2954                     dev->coherent_dma_mask < dma_mask)
2955                         dma_mask = dev->coherent_dma_mask;
2956
2957                 return dma_mask >= dma_get_required_mask(dev);
2958         }
2959
2960         return 1;
2961 }
2962
2963 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2964 {
2965         int ret;
2966
2967         if (!iommu_should_identity_map(dev, 1))
2968                 return 0;
2969
2970         ret = domain_add_dev_info(si_domain, dev);
2971         if (!ret)
2972                 pr_info("%s identity mapping for device %s\n",
2973                         hw ? "Hardware" : "Software", dev_name(dev));
2974         else if (ret == -ENODEV)
2975                 /* device not associated with an iommu */
2976                 ret = 0;
2977
2978         return ret;
2979 }
2980
2981
2982 static int __init iommu_prepare_static_identity_mapping(int hw)
2983 {
2984         struct pci_dev *pdev = NULL;
2985         struct dmar_drhd_unit *drhd;
2986         struct intel_iommu *iommu;
2987         struct device *dev;
2988         int i;
2989         int ret = 0;
2990
2991         for_each_pci_dev(pdev) {
2992                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2993                 if (ret)
2994                         return ret;
2995         }
2996
2997         for_each_active_iommu(iommu, drhd)
2998                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2999                         struct acpi_device_physical_node *pn;
3000                         struct acpi_device *adev;
3001
3002                         if (dev->bus != &acpi_bus_type)
3003                                 continue;
3004
3005                         adev= to_acpi_device(dev);
3006                         mutex_lock(&adev->physical_node_lock);
3007                         list_for_each_entry(pn, &adev->physical_node_list, node) {
3008                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
3009                                 if (ret)
3010                                         break;
3011                         }
3012                         mutex_unlock(&adev->physical_node_lock);
3013                         if (ret)
3014                                 return ret;
3015                 }
3016
3017         return 0;
3018 }
3019
3020 static void intel_iommu_init_qi(struct intel_iommu *iommu)
3021 {
3022         /*
3023          * Start from the sane iommu hardware state.
3024          * If the queued invalidation is already initialized by us
3025          * (for example, while enabling interrupt-remapping) then
3026          * we got the things already rolling from a sane state.
3027          */
3028         if (!iommu->qi) {
3029                 /*
3030                  * Clear any previous faults.
3031                  */
3032                 dmar_fault(-1, iommu);
3033                 /*
3034                  * Disable queued invalidation if supported and already enabled
3035                  * before OS handover.
3036                  */
3037                 dmar_disable_qi(iommu);
3038         }
3039
3040         if (dmar_enable_qi(iommu)) {
3041                 /*
3042                  * Queued Invalidate not enabled, use Register Based Invalidate
3043                  */
3044                 iommu->flush.flush_context = __iommu_flush_context;
3045                 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
3046                 pr_info("%s: Using Register based invalidation\n",
3047                         iommu->name);
3048         } else {
3049                 iommu->flush.flush_context = qi_flush_context;
3050                 iommu->flush.flush_iotlb = qi_flush_iotlb;
3051                 pr_info("%s: Using Queued invalidation\n", iommu->name);
3052         }
3053 }
3054
3055 static int copy_context_table(struct intel_iommu *iommu,
3056                               struct root_entry *old_re,
3057                               struct context_entry **tbl,
3058                               int bus, bool ext)
3059 {
3060         int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3061         struct context_entry *new_ce = NULL, ce;
3062         struct context_entry *old_ce = NULL;
3063         struct root_entry re;
3064         phys_addr_t old_ce_phys;
3065
3066         tbl_idx = ext ? bus * 2 : bus;
3067         memcpy(&re, old_re, sizeof(re));
3068
3069         for (devfn = 0; devfn < 256; devfn++) {
3070                 /* First calculate the correct index */
3071                 idx = (ext ? devfn * 2 : devfn) % 256;
3072
3073                 if (idx == 0) {
3074                         /* First save what we may have and clean up */
3075                         if (new_ce) {
3076                                 tbl[tbl_idx] = new_ce;
3077                                 __iommu_flush_cache(iommu, new_ce,
3078                                                     VTD_PAGE_SIZE);
3079                                 pos = 1;
3080                         }
3081
3082                         if (old_ce)
3083                                 iounmap(old_ce);
3084
3085                         ret = 0;
3086                         if (devfn < 0x80)
3087                                 old_ce_phys = root_entry_lctp(&re);
3088                         else
3089                                 old_ce_phys = root_entry_uctp(&re);
3090
3091                         if (!old_ce_phys) {
3092                                 if (ext && devfn == 0) {
3093                                         /* No LCTP, try UCTP */
3094                                         devfn = 0x7f;
3095                                         continue;
3096                                 } else {
3097                                         goto out;
3098                                 }
3099                         }
3100
3101                         ret = -ENOMEM;
3102                         old_ce = memremap(old_ce_phys, PAGE_SIZE,
3103                                         MEMREMAP_WB);
3104                         if (!old_ce)
3105                                 goto out;
3106
3107                         new_ce = alloc_pgtable_page(iommu->node);
3108                         if (!new_ce)
3109                                 goto out_unmap;
3110
3111                         ret = 0;
3112                 }
3113
3114                 /* Now copy the context entry */
3115                 memcpy(&ce, old_ce + idx, sizeof(ce));
3116
3117                 if (!__context_present(&ce))
3118                         continue;
3119
3120                 did = context_domain_id(&ce);
3121                 if (did >= 0 && did < cap_ndoms(iommu->cap))
3122                         set_bit(did, iommu->domain_ids);
3123
3124                 /*
3125                  * We need a marker for copied context entries. This
3126                  * marker needs to work for the old format as well as
3127                  * for extended context entries.
3128                  *
3129                  * Bit 67 of the context entry is used. In the old
3130                  * format this bit is available to software, in the
3131                  * extended format it is the PGE bit, but PGE is ignored
3132                  * by HW if PASIDs are disabled (and thus still
3133                  * available).
3134                  *
3135                  * So disable PASIDs first and then mark the entry
3136                  * copied. This means that we don't copy PASID
3137                  * translations from the old kernel, but this is fine as
3138                  * faults there are not fatal.
3139                  */
3140                 context_clear_pasid_enable(&ce);
3141                 context_set_copied(&ce);
3142
3143                 new_ce[idx] = ce;
3144         }
3145
3146         tbl[tbl_idx + pos] = new_ce;
3147
3148         __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3149
3150 out_unmap:
3151         memunmap(old_ce);
3152
3153 out:
3154         return ret;
3155 }
3156
3157 static int copy_translation_tables(struct intel_iommu *iommu)
3158 {
3159         struct context_entry **ctxt_tbls;
3160         struct root_entry *old_rt;
3161         phys_addr_t old_rt_phys;
3162         int ctxt_table_entries;
3163         unsigned long flags;
3164         u64 rtaddr_reg;
3165         int bus, ret;
3166         bool new_ext, ext;
3167
3168         rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3169         ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
3170         new_ext    = !!ecap_ecs(iommu->ecap);
3171
3172         /*
3173          * The RTT bit can only be changed when translation is disabled,
3174          * but disabling translation means to open a window for data
3175          * corruption. So bail out and don't copy anything if we would
3176          * have to change the bit.
3177          */
3178         if (new_ext != ext)
3179                 return -EINVAL;
3180
3181         old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3182         if (!old_rt_phys)
3183                 return -EINVAL;
3184
3185         old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3186         if (!old_rt)
3187                 return -ENOMEM;
3188
3189         /* This is too big for the stack - allocate it from slab */
3190         ctxt_table_entries = ext ? 512 : 256;
3191         ret = -ENOMEM;
3192         ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
3193         if (!ctxt_tbls)
3194                 goto out_unmap;
3195
3196         for (bus = 0; bus < 256; bus++) {
3197                 ret = copy_context_table(iommu, &old_rt[bus],
3198                                          ctxt_tbls, bus, ext);
3199                 if (ret) {
3200                         pr_err("%s: Failed to copy context table for bus %d\n",
3201                                 iommu->name, bus);
3202                         continue;
3203                 }
3204         }
3205
3206         spin_lock_irqsave(&iommu->lock, flags);
3207
3208         /* Context tables are copied, now write them to the root_entry table */
3209         for (bus = 0; bus < 256; bus++) {
3210                 int idx = ext ? bus * 2 : bus;
3211                 u64 val;
3212
3213                 if (ctxt_tbls[idx]) {
3214                         val = virt_to_phys(ctxt_tbls[idx]) | 1;
3215                         iommu->root_entry[bus].lo = val;
3216                 }
3217
3218                 if (!ext || !ctxt_tbls[idx + 1])
3219                         continue;
3220
3221                 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3222                 iommu->root_entry[bus].hi = val;
3223         }
3224
3225         spin_unlock_irqrestore(&iommu->lock, flags);
3226
3227         kfree(ctxt_tbls);
3228
3229         __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3230
3231         ret = 0;
3232
3233 out_unmap:
3234         memunmap(old_rt);
3235
3236         return ret;
3237 }
3238
3239 static int __init init_dmars(void)
3240 {
3241         struct dmar_drhd_unit *drhd;
3242         struct dmar_rmrr_unit *rmrr;
3243         bool copied_tables = false;
3244         struct device *dev;
3245         struct intel_iommu *iommu;
3246         int i, ret;
3247
3248         /*
3249          * for each drhd
3250          *    allocate root
3251          *    initialize and program root entry to not present
3252          * endfor
3253          */
3254         for_each_drhd_unit(drhd) {
3255                 /*
3256                  * lock not needed as this is only incremented in the single
3257                  * threaded kernel __init code path all other access are read
3258                  * only
3259                  */
3260                 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3261                         g_num_of_iommus++;
3262                         continue;
3263                 }
3264                 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3265         }
3266
3267         /* Preallocate enough resources for IOMMU hot-addition */
3268         if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3269                 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3270
3271         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3272                         GFP_KERNEL);
3273         if (!g_iommus) {
3274                 pr_err("Allocating global iommu array failed\n");
3275                 ret = -ENOMEM;
3276                 goto error;
3277         }
3278
3279         for_each_active_iommu(iommu, drhd) {
3280                 g_iommus[iommu->seq_id] = iommu;
3281
3282                 intel_iommu_init_qi(iommu);
3283
3284                 ret = iommu_init_domains(iommu);
3285                 if (ret)
3286                         goto free_iommu;
3287
3288                 init_translation_status(iommu);
3289
3290                 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3291                         iommu_disable_translation(iommu);
3292                         clear_translation_pre_enabled(iommu);
3293                         pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3294                                 iommu->name);
3295                 }
3296
3297                 /*
3298                  * TBD:
3299                  * we could share the same root & context tables
3300                  * among all IOMMU's. Need to Split it later.
3301                  */
3302                 ret = iommu_alloc_root_entry(iommu);
3303                 if (ret)
3304                         goto free_iommu;
3305
3306                 if (translation_pre_enabled(iommu)) {
3307                         pr_info("Translation already enabled - trying to copy translation structures\n");
3308
3309                         ret = copy_translation_tables(iommu);
3310                         if (ret) {
3311                                 /*
3312                                  * We found the IOMMU with translation
3313                                  * enabled - but failed to copy over the
3314                                  * old root-entry table. Try to proceed
3315                                  * by disabling translation now and
3316                                  * allocating a clean root-entry table.
3317                                  * This might cause DMAR faults, but
3318                                  * probably the dump will still succeed.
3319                                  */
3320                                 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3321                                        iommu->name);
3322                                 iommu_disable_translation(iommu);
3323                                 clear_translation_pre_enabled(iommu);
3324                         } else {
3325                                 pr_info("Copied translation tables from previous kernel for %s\n",
3326                                         iommu->name);
3327                                 copied_tables = true;
3328                         }
3329                 }
3330
3331                 if (!ecap_pass_through(iommu->ecap))
3332                         hw_pass_through = 0;
3333 #ifdef CONFIG_INTEL_IOMMU_SVM
3334                 if (pasid_enabled(iommu))
3335                         intel_svm_alloc_pasid_tables(iommu);
3336 #endif
3337         }
3338
3339         /*
3340          * Now that qi is enabled on all iommus, set the root entry and flush
3341          * caches. This is required on some Intel X58 chipsets, otherwise the
3342          * flush_context function will loop forever and the boot hangs.
3343          */
3344         for_each_active_iommu(iommu, drhd) {
3345                 iommu_flush_write_buffer(iommu);
3346                 iommu_set_root_entry(iommu);
3347                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3348                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3349         }
3350
3351         if (iommu_pass_through)
3352                 iommu_identity_mapping |= IDENTMAP_ALL;
3353
3354 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
3355         iommu_identity_mapping |= IDENTMAP_GFX;
3356 #endif
3357
3358         check_tylersburg_isoch();
3359
3360         if (iommu_identity_mapping) {
3361                 ret = si_domain_init(hw_pass_through);
3362                 if (ret)
3363                         goto free_iommu;
3364         }
3365
3366
3367         /*
3368          * If we copied translations from a previous kernel in the kdump
3369          * case, we can not assign the devices to domains now, as that
3370          * would eliminate the old mappings. So skip this part and defer
3371          * the assignment to device driver initialization time.
3372          */