Merge tag 'iommu-updates-v5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro...
[muen/linux.git] / drivers / iommu / intel-iommu.c
1 /*
2  * Copyright © 2006-2014 Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * Authors: David Woodhouse <dwmw2@infradead.org>,
14  *          Ashok Raj <ashok.raj@intel.com>,
15  *          Shaohua Li <shaohua.li@intel.com>,
16  *          Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17  *          Fenghua Yu <fenghua.yu@intel.com>
18  *          Joerg Roedel <jroedel@suse.de>
19  */
20
21 #define pr_fmt(fmt)     "DMAR: " fmt
22 #define dev_fmt(fmt)    pr_fmt(fmt)
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/export.h>
28 #include <linux/slab.h>
29 #include <linux/irq.h>
30 #include <linux/interrupt.h>
31 #include <linux/spinlock.h>
32 #include <linux/pci.h>
33 #include <linux/dmar.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/mempool.h>
36 #include <linux/memory.h>
37 #include <linux/cpu.h>
38 #include <linux/timer.h>
39 #include <linux/io.h>
40 #include <linux/iova.h>
41 #include <linux/iommu.h>
42 #include <linux/intel-iommu.h>
43 #include <linux/syscore_ops.h>
44 #include <linux/tboot.h>
45 #include <linux/dmi.h>
46 #include <linux/pci-ats.h>
47 #include <linux/memblock.h>
48 #include <linux/dma-contiguous.h>
49 #include <linux/dma-direct.h>
50 #include <linux/crash_dump.h>
51 #include <linux/numa.h>
52 #include <asm/irq_remapping.h>
53 #include <asm/cacheflush.h>
54 #include <asm/iommu.h>
55
56 #include "irq_remapping.h"
57 #include "intel-pasid.h"
58
59 #define ROOT_SIZE               VTD_PAGE_SIZE
60 #define CONTEXT_SIZE            VTD_PAGE_SIZE
61
62 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
63 #define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
64 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
65 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
66
67 #define IOAPIC_RANGE_START      (0xfee00000)
68 #define IOAPIC_RANGE_END        (0xfeefffff)
69 #define IOVA_START_ADDR         (0x1000)
70
71 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 57
72
73 #define MAX_AGAW_WIDTH 64
74 #define MAX_AGAW_PFN_WIDTH      (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
75
76 #define __DOMAIN_MAX_PFN(gaw)  ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
77 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
78
79 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
80    to match. That way, we can use 'unsigned long' for PFNs with impunity. */
81 #define DOMAIN_MAX_PFN(gaw)     ((unsigned long) min_t(uint64_t, \
82                                 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
83 #define DOMAIN_MAX_ADDR(gaw)    (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
84
85 /* IO virtual address start page frame number */
86 #define IOVA_START_PFN          (1)
87
88 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
89
90 /* page table handling */
91 #define LEVEL_STRIDE            (9)
92 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
93
94 /*
95  * This bitmap is used to advertise the page sizes our hardware support
96  * to the IOMMU core, which will then use this information to split
97  * physically contiguous memory regions it is mapping into page sizes
98  * that we support.
99  *
100  * Traditionally the IOMMU core just handed us the mappings directly,
101  * after making sure the size is an order of a 4KiB page and that the
102  * mapping has natural alignment.
103  *
104  * To retain this behavior, we currently advertise that we support
105  * all page sizes that are an order of 4KiB.
106  *
107  * If at some point we'd like to utilize the IOMMU core's new behavior,
108  * we could change this to advertise the real page sizes we support.
109  */
110 #define INTEL_IOMMU_PGSIZES     (~0xFFFUL)
111
112 static inline int agaw_to_level(int agaw)
113 {
114         return agaw + 2;
115 }
116
117 static inline int agaw_to_width(int agaw)
118 {
119         return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
120 }
121
122 static inline int width_to_agaw(int width)
123 {
124         return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
125 }
126
127 static inline unsigned int level_to_offset_bits(int level)
128 {
129         return (level - 1) * LEVEL_STRIDE;
130 }
131
132 static inline int pfn_level_offset(unsigned long pfn, int level)
133 {
134         return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
135 }
136
137 static inline unsigned long level_mask(int level)
138 {
139         return -1UL << level_to_offset_bits(level);
140 }
141
142 static inline unsigned long level_size(int level)
143 {
144         return 1UL << level_to_offset_bits(level);
145 }
146
147 static inline unsigned long align_to_level(unsigned long pfn, int level)
148 {
149         return (pfn + level_size(level) - 1) & level_mask(level);
150 }
151
152 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
153 {
154         return  1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
155 }
156
157 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
158    are never going to work. */
159 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
160 {
161         return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
162 }
163
164 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
165 {
166         return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
167 }
168 static inline unsigned long page_to_dma_pfn(struct page *pg)
169 {
170         return mm_to_dma_pfn(page_to_pfn(pg));
171 }
172 static inline unsigned long virt_to_dma_pfn(void *p)
173 {
174         return page_to_dma_pfn(virt_to_page(p));
175 }
176
177 /* global iommu list, set NULL for ignored DMAR units */
178 static struct intel_iommu **g_iommus;
179
180 static void __init check_tylersburg_isoch(void);
181 static int rwbf_quirk;
182
183 /*
184  * set to 1 to panic kernel if can't successfully enable VT-d
185  * (used when kernel is launched w/ TXT)
186  */
187 static int force_on = 0;
188 int intel_iommu_tboot_noforce;
189 static int no_platform_optin;
190
191 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
192
193 /*
194  * Take a root_entry and return the Lower Context Table Pointer (LCTP)
195  * if marked present.
196  */
197 static phys_addr_t root_entry_lctp(struct root_entry *re)
198 {
199         if (!(re->lo & 1))
200                 return 0;
201
202         return re->lo & VTD_PAGE_MASK;
203 }
204
205 /*
206  * Take a root_entry and return the Upper Context Table Pointer (UCTP)
207  * if marked present.
208  */
209 static phys_addr_t root_entry_uctp(struct root_entry *re)
210 {
211         if (!(re->hi & 1))
212                 return 0;
213
214         return re->hi & VTD_PAGE_MASK;
215 }
216
217 static inline void context_clear_pasid_enable(struct context_entry *context)
218 {
219         context->lo &= ~(1ULL << 11);
220 }
221
222 static inline bool context_pasid_enabled(struct context_entry *context)
223 {
224         return !!(context->lo & (1ULL << 11));
225 }
226
227 static inline void context_set_copied(struct context_entry *context)
228 {
229         context->hi |= (1ull << 3);
230 }
231
232 static inline bool context_copied(struct context_entry *context)
233 {
234         return !!(context->hi & (1ULL << 3));
235 }
236
237 static inline bool __context_present(struct context_entry *context)
238 {
239         return (context->lo & 1);
240 }
241
242 bool context_present(struct context_entry *context)
243 {
244         return context_pasid_enabled(context) ?
245              __context_present(context) :
246              __context_present(context) && !context_copied(context);
247 }
248
249 static inline void context_set_present(struct context_entry *context)
250 {
251         context->lo |= 1;
252 }
253
254 static inline void context_set_fault_enable(struct context_entry *context)
255 {
256         context->lo &= (((u64)-1) << 2) | 1;
257 }
258
259 static inline void context_set_translation_type(struct context_entry *context,
260                                                 unsigned long value)
261 {
262         context->lo &= (((u64)-1) << 4) | 3;
263         context->lo |= (value & 3) << 2;
264 }
265
266 static inline void context_set_address_root(struct context_entry *context,
267                                             unsigned long value)
268 {
269         context->lo &= ~VTD_PAGE_MASK;
270         context->lo |= value & VTD_PAGE_MASK;
271 }
272
273 static inline void context_set_address_width(struct context_entry *context,
274                                              unsigned long value)
275 {
276         context->hi |= value & 7;
277 }
278
279 static inline void context_set_domain_id(struct context_entry *context,
280                                          unsigned long value)
281 {
282         context->hi |= (value & ((1 << 16) - 1)) << 8;
283 }
284
285 static inline int context_domain_id(struct context_entry *c)
286 {
287         return((c->hi >> 8) & 0xffff);
288 }
289
290 static inline void context_clear_entry(struct context_entry *context)
291 {
292         context->lo = 0;
293         context->hi = 0;
294 }
295
296 /*
297  * This domain is a statically identity mapping domain.
298  *      1. This domain creats a static 1:1 mapping to all usable memory.
299  *      2. It maps to each iommu if successful.
300  *      3. Each iommu mapps to this domain if successful.
301  */
302 static struct dmar_domain *si_domain;
303 static int hw_pass_through = 1;
304
305 /*
306  * Domain represents a virtual machine, more than one devices
307  * across iommus may be owned in one domain, e.g. kvm guest.
308  */
309 #define DOMAIN_FLAG_VIRTUAL_MACHINE     (1 << 0)
310
311 /* si_domain contains mulitple devices */
312 #define DOMAIN_FLAG_STATIC_IDENTITY     (1 << 1)
313
314 #define for_each_domain_iommu(idx, domain)                      \
315         for (idx = 0; idx < g_num_of_iommus; idx++)             \
316                 if (domain->iommu_refcnt[idx])
317
318 struct dmar_rmrr_unit {
319         struct list_head list;          /* list of rmrr units   */
320         struct acpi_dmar_header *hdr;   /* ACPI header          */
321         u64     base_address;           /* reserved base address*/
322         u64     end_address;            /* reserved end address */
323         struct dmar_dev_scope *devices; /* target devices */
324         int     devices_cnt;            /* target device count */
325         struct iommu_resv_region *resv; /* reserved region handle */
326 };
327
328 struct dmar_atsr_unit {
329         struct list_head list;          /* list of ATSR units */
330         struct acpi_dmar_header *hdr;   /* ACPI header */
331         struct dmar_dev_scope *devices; /* target devices */
332         int devices_cnt;                /* target device count */
333         u8 include_all:1;               /* include all ports */
334 };
335
336 static LIST_HEAD(dmar_atsr_units);
337 static LIST_HEAD(dmar_rmrr_units);
338
339 #define for_each_rmrr_units(rmrr) \
340         list_for_each_entry(rmrr, &dmar_rmrr_units, list)
341
342 /* bitmap for indexing intel_iommus */
343 static int g_num_of_iommus;
344
345 static void domain_exit(struct dmar_domain *domain);
346 static void domain_remove_dev_info(struct dmar_domain *domain);
347 static void dmar_remove_one_dev_info(struct device *dev);
348 static void __dmar_remove_one_dev_info(struct device_domain_info *info);
349 static void domain_context_clear(struct intel_iommu *iommu,
350                                  struct device *dev);
351 static int domain_detach_iommu(struct dmar_domain *domain,
352                                struct intel_iommu *iommu);
353
354 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
355 int dmar_disabled = 0;
356 #else
357 int dmar_disabled = 1;
358 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
359
360 int intel_iommu_enabled = 0;
361 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
362
363 static int dmar_map_gfx = 1;
364 static int dmar_forcedac;
365 static int intel_iommu_strict;
366 static int intel_iommu_superpage = 1;
367 static int intel_iommu_sm;
368 static int iommu_identity_mapping;
369
370 #define IDENTMAP_ALL            1
371 #define IDENTMAP_GFX            2
372 #define IDENTMAP_AZALIA         4
373
374 #define sm_supported(iommu)     (intel_iommu_sm && ecap_smts((iommu)->ecap))
375 #define pasid_supported(iommu)  (sm_supported(iommu) &&                 \
376                                  ecap_pasid((iommu)->ecap))
377
378 int intel_iommu_gfx_mapped;
379 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
380
381 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
382 static DEFINE_SPINLOCK(device_domain_lock);
383 static LIST_HEAD(device_domain_list);
384
385 /*
386  * Iterate over elements in device_domain_list and call the specified
387  * callback @fn against each element.
388  */
389 int for_each_device_domain(int (*fn)(struct device_domain_info *info,
390                                      void *data), void *data)
391 {
392         int ret = 0;
393         unsigned long flags;
394         struct device_domain_info *info;
395
396         spin_lock_irqsave(&device_domain_lock, flags);
397         list_for_each_entry(info, &device_domain_list, global) {
398                 ret = fn(info, data);
399                 if (ret) {
400                         spin_unlock_irqrestore(&device_domain_lock, flags);
401                         return ret;
402                 }
403         }
404         spin_unlock_irqrestore(&device_domain_lock, flags);
405
406         return 0;
407 }
408
409 const struct iommu_ops intel_iommu_ops;
410
411 static bool translation_pre_enabled(struct intel_iommu *iommu)
412 {
413         return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
414 }
415
416 static void clear_translation_pre_enabled(struct intel_iommu *iommu)
417 {
418         iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
419 }
420
421 static void init_translation_status(struct intel_iommu *iommu)
422 {
423         u32 gsts;
424
425         gsts = readl(iommu->reg + DMAR_GSTS_REG);
426         if (gsts & DMA_GSTS_TES)
427                 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
428 }
429
430 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
431 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
432 {
433         return container_of(dom, struct dmar_domain, domain);
434 }
435
436 static int __init intel_iommu_setup(char *str)
437 {
438         if (!str)
439                 return -EINVAL;
440         while (*str) {
441                 if (!strncmp(str, "on", 2)) {
442                         dmar_disabled = 0;
443                         pr_info("IOMMU enabled\n");
444                 } else if (!strncmp(str, "off", 3)) {
445                         dmar_disabled = 1;
446                         no_platform_optin = 1;
447                         pr_info("IOMMU disabled\n");
448                 } else if (!strncmp(str, "igfx_off", 8)) {
449                         dmar_map_gfx = 0;
450                         pr_info("Disable GFX device mapping\n");
451                 } else if (!strncmp(str, "forcedac", 8)) {
452                         pr_info("Forcing DAC for PCI devices\n");
453                         dmar_forcedac = 1;
454                 } else if (!strncmp(str, "strict", 6)) {
455                         pr_info("Disable batched IOTLB flush\n");
456                         intel_iommu_strict = 1;
457                 } else if (!strncmp(str, "sp_off", 6)) {
458                         pr_info("Disable supported super page\n");
459                         intel_iommu_superpage = 0;
460                 } else if (!strncmp(str, "sm_on", 5)) {
461                         pr_info("Intel-IOMMU: scalable mode supported\n");
462                         intel_iommu_sm = 1;
463                 } else if (!strncmp(str, "tboot_noforce", 13)) {
464                         printk(KERN_INFO
465                                 "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n");
466                         intel_iommu_tboot_noforce = 1;
467                 }
468
469                 str += strcspn(str, ",");
470                 while (*str == ',')
471                         str++;
472         }
473         return 0;
474 }
475 __setup("intel_iommu=", intel_iommu_setup);
476
477 static struct kmem_cache *iommu_domain_cache;
478 static struct kmem_cache *iommu_devinfo_cache;
479
480 static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
481 {
482         struct dmar_domain **domains;
483         int idx = did >> 8;
484
485         domains = iommu->domains[idx];
486         if (!domains)
487                 return NULL;
488
489         return domains[did & 0xff];
490 }
491
492 static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
493                              struct dmar_domain *domain)
494 {
495         struct dmar_domain **domains;
496         int idx = did >> 8;
497
498         if (!iommu->domains[idx]) {
499                 size_t size = 256 * sizeof(struct dmar_domain *);
500                 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
501         }
502
503         domains = iommu->domains[idx];
504         if (WARN_ON(!domains))
505                 return;
506         else
507                 domains[did & 0xff] = domain;
508 }
509
510 void *alloc_pgtable_page(int node)
511 {
512         struct page *page;
513         void *vaddr = NULL;
514
515         page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
516         if (page)
517                 vaddr = page_address(page);
518         return vaddr;
519 }
520
521 void free_pgtable_page(void *vaddr)
522 {
523         free_page((unsigned long)vaddr);
524 }
525
526 static inline void *alloc_domain_mem(void)
527 {
528         return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
529 }
530
531 static void free_domain_mem(void *vaddr)
532 {
533         kmem_cache_free(iommu_domain_cache, vaddr);
534 }
535
536 static inline void * alloc_devinfo_mem(void)
537 {
538         return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
539 }
540
541 static inline void free_devinfo_mem(void *vaddr)
542 {
543         kmem_cache_free(iommu_devinfo_cache, vaddr);
544 }
545
546 static inline int domain_type_is_vm(struct dmar_domain *domain)
547 {
548         return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
549 }
550
551 static inline int domain_type_is_si(struct dmar_domain *domain)
552 {
553         return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
554 }
555
556 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
557 {
558         return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
559                                 DOMAIN_FLAG_STATIC_IDENTITY);
560 }
561
562 static inline int domain_pfn_supported(struct dmar_domain *domain,
563                                        unsigned long pfn)
564 {
565         int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
566
567         return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
568 }
569
570 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
571 {
572         unsigned long sagaw;
573         int agaw = -1;
574
575         sagaw = cap_sagaw(iommu->cap);
576         for (agaw = width_to_agaw(max_gaw);
577              agaw >= 0; agaw--) {
578                 if (test_bit(agaw, &sagaw))
579                         break;
580         }
581
582         return agaw;
583 }
584
585 /*
586  * Calculate max SAGAW for each iommu.
587  */
588 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
589 {
590         return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
591 }
592
593 /*
594  * calculate agaw for each iommu.
595  * "SAGAW" may be different across iommus, use a default agaw, and
596  * get a supported less agaw for iommus that don't support the default agaw.
597  */
598 int iommu_calculate_agaw(struct intel_iommu *iommu)
599 {
600         return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
601 }
602
603 /* This functionin only returns single iommu in a domain */
604 struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
605 {
606         int iommu_id;
607
608         /* si_domain and vm domain should not get here. */
609         BUG_ON(domain_type_is_vm_or_si(domain));
610         for_each_domain_iommu(iommu_id, domain)
611                 break;
612
613         if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
614                 return NULL;
615
616         return g_iommus[iommu_id];
617 }
618
619 static void domain_update_iommu_coherency(struct dmar_domain *domain)
620 {
621         struct dmar_drhd_unit *drhd;
622         struct intel_iommu *iommu;
623         bool found = false;
624         int i;
625
626         domain->iommu_coherency = 1;
627
628         for_each_domain_iommu(i, domain) {
629                 found = true;
630                 if (!ecap_coherent(g_iommus[i]->ecap)) {
631                         domain->iommu_coherency = 0;
632                         break;
633                 }
634         }
635         if (found)
636                 return;
637
638         /* No hardware attached; use lowest common denominator */
639         rcu_read_lock();
640         for_each_active_iommu(iommu, drhd) {
641                 if (!ecap_coherent(iommu->ecap)) {
642                         domain->iommu_coherency = 0;
643                         break;
644                 }
645         }
646         rcu_read_unlock();
647 }
648
649 static int domain_update_iommu_snooping(struct intel_iommu *skip)
650 {
651         struct dmar_drhd_unit *drhd;
652         struct intel_iommu *iommu;
653         int ret = 1;
654
655         rcu_read_lock();
656         for_each_active_iommu(iommu, drhd) {
657                 if (iommu != skip) {
658                         if (!ecap_sc_support(iommu->ecap)) {
659                                 ret = 0;
660                                 break;
661                         }
662                 }
663         }
664         rcu_read_unlock();
665
666         return ret;
667 }
668
669 static int domain_update_iommu_superpage(struct intel_iommu *skip)
670 {
671         struct dmar_drhd_unit *drhd;
672         struct intel_iommu *iommu;
673         int mask = 0xf;
674
675         if (!intel_iommu_superpage) {
676                 return 0;
677         }
678
679         /* set iommu_superpage to the smallest common denominator */
680         rcu_read_lock();
681         for_each_active_iommu(iommu, drhd) {
682                 if (iommu != skip) {
683                         mask &= cap_super_page_val(iommu->cap);
684                         if (!mask)
685                                 break;
686                 }
687         }
688         rcu_read_unlock();
689
690         return fls(mask);
691 }
692
693 /* Some capabilities may be different across iommus */
694 static void domain_update_iommu_cap(struct dmar_domain *domain)
695 {
696         domain_update_iommu_coherency(domain);
697         domain->iommu_snooping = domain_update_iommu_snooping(NULL);
698         domain->iommu_superpage = domain_update_iommu_superpage(NULL);
699 }
700
701 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
702                                          u8 devfn, int alloc)
703 {
704         struct root_entry *root = &iommu->root_entry[bus];
705         struct context_entry *context;
706         u64 *entry;
707
708         entry = &root->lo;
709         if (sm_supported(iommu)) {
710                 if (devfn >= 0x80) {
711                         devfn -= 0x80;
712                         entry = &root->hi;
713                 }
714                 devfn *= 2;
715         }
716         if (*entry & 1)
717                 context = phys_to_virt(*entry & VTD_PAGE_MASK);
718         else {
719                 unsigned long phy_addr;
720                 if (!alloc)
721                         return NULL;
722
723                 context = alloc_pgtable_page(iommu->node);
724                 if (!context)
725                         return NULL;
726
727                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
728                 phy_addr = virt_to_phys((void *)context);
729                 *entry = phy_addr | 1;
730                 __iommu_flush_cache(iommu, entry, sizeof(*entry));
731         }
732         return &context[devfn];
733 }
734
735 static int iommu_dummy(struct device *dev)
736 {
737         return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
738 }
739
740 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
741 {
742         struct dmar_drhd_unit *drhd = NULL;
743         struct intel_iommu *iommu;
744         struct device *tmp;
745         struct pci_dev *ptmp, *pdev = NULL;
746         u16 segment = 0;
747         int i;
748
749         if (iommu_dummy(dev))
750                 return NULL;
751
752         if (dev_is_pci(dev)) {
753                 struct pci_dev *pf_pdev;
754
755                 pdev = to_pci_dev(dev);
756
757 #ifdef CONFIG_X86
758                 /* VMD child devices currently cannot be handled individually */
759                 if (is_vmd(pdev->bus))
760                         return NULL;
761 #endif
762
763                 /* VFs aren't listed in scope tables; we need to look up
764                  * the PF instead to find the IOMMU. */
765                 pf_pdev = pci_physfn(pdev);
766                 dev = &pf_pdev->dev;
767                 segment = pci_domain_nr(pdev->bus);
768         } else if (has_acpi_companion(dev))
769                 dev = &ACPI_COMPANION(dev)->dev;
770
771         rcu_read_lock();
772         for_each_active_iommu(iommu, drhd) {
773                 if (pdev && segment != drhd->segment)
774                         continue;
775
776                 for_each_active_dev_scope(drhd->devices,
777                                           drhd->devices_cnt, i, tmp) {
778                         if (tmp == dev) {
779                                 /* For a VF use its original BDF# not that of the PF
780                                  * which we used for the IOMMU lookup. Strictly speaking
781                                  * we could do this for all PCI devices; we only need to
782                                  * get the BDF# from the scope table for ACPI matches. */
783                                 if (pdev && pdev->is_virtfn)
784                                         goto got_pdev;
785
786                                 *bus = drhd->devices[i].bus;
787                                 *devfn = drhd->devices[i].devfn;
788                                 goto out;
789                         }
790
791                         if (!pdev || !dev_is_pci(tmp))
792                                 continue;
793
794                         ptmp = to_pci_dev(tmp);
795                         if (ptmp->subordinate &&
796                             ptmp->subordinate->number <= pdev->bus->number &&
797                             ptmp->subordinate->busn_res.end >= pdev->bus->number)
798                                 goto got_pdev;
799                 }
800
801                 if (pdev && drhd->include_all) {
802                 got_pdev:
803                         *bus = pdev->bus->number;
804                         *devfn = pdev->devfn;
805                         goto out;
806                 }
807         }
808         iommu = NULL;
809  out:
810         rcu_read_unlock();
811
812         return iommu;
813 }
814
815 static void domain_flush_cache(struct dmar_domain *domain,
816                                void *addr, int size)
817 {
818         if (!domain->iommu_coherency)
819                 clflush_cache_range(addr, size);
820 }
821
822 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
823 {
824         struct context_entry *context;
825         int ret = 0;
826         unsigned long flags;
827
828         spin_lock_irqsave(&iommu->lock, flags);
829         context = iommu_context_addr(iommu, bus, devfn, 0);
830         if (context)
831                 ret = context_present(context);
832         spin_unlock_irqrestore(&iommu->lock, flags);
833         return ret;
834 }
835
836 static void free_context_table(struct intel_iommu *iommu)
837 {
838         int i;
839         unsigned long flags;
840         struct context_entry *context;
841
842         spin_lock_irqsave(&iommu->lock, flags);
843         if (!iommu->root_entry) {
844                 goto out;
845         }
846         for (i = 0; i < ROOT_ENTRY_NR; i++) {
847                 context = iommu_context_addr(iommu, i, 0, 0);
848                 if (context)
849                         free_pgtable_page(context);
850
851                 if (!sm_supported(iommu))
852                         continue;
853
854                 context = iommu_context_addr(iommu, i, 0x80, 0);
855                 if (context)
856                         free_pgtable_page(context);
857
858         }
859         free_pgtable_page(iommu->root_entry);
860         iommu->root_entry = NULL;
861 out:
862         spin_unlock_irqrestore(&iommu->lock, flags);
863 }
864
865 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
866                                       unsigned long pfn, int *target_level)
867 {
868         struct dma_pte *parent, *pte;
869         int level = agaw_to_level(domain->agaw);
870         int offset;
871
872         BUG_ON(!domain->pgd);
873
874         if (!domain_pfn_supported(domain, pfn))
875                 /* Address beyond IOMMU's addressing capabilities. */
876                 return NULL;
877
878         parent = domain->pgd;
879
880         while (1) {
881                 void *tmp_page;
882
883                 offset = pfn_level_offset(pfn, level);
884                 pte = &parent[offset];
885                 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
886                         break;
887                 if (level == *target_level)
888                         break;
889
890                 if (!dma_pte_present(pte)) {
891                         uint64_t pteval;
892
893                         tmp_page = alloc_pgtable_page(domain->nid);
894
895                         if (!tmp_page)
896                                 return NULL;
897
898                         domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
899                         pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
900                         if (cmpxchg64(&pte->val, 0ULL, pteval))
901                                 /* Someone else set it while we were thinking; use theirs. */
902                                 free_pgtable_page(tmp_page);
903                         else
904                                 domain_flush_cache(domain, pte, sizeof(*pte));
905                 }
906                 if (level == 1)
907                         break;
908
909                 parent = phys_to_virt(dma_pte_addr(pte));
910                 level--;
911         }
912
913         if (!*target_level)
914                 *target_level = level;
915
916         return pte;
917 }
918
919
920 /* return address's pte at specific level */
921 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
922                                          unsigned long pfn,
923                                          int level, int *large_page)
924 {
925         struct dma_pte *parent, *pte;
926         int total = agaw_to_level(domain->agaw);
927         int offset;
928
929         parent = domain->pgd;
930         while (level <= total) {
931                 offset = pfn_level_offset(pfn, total);
932                 pte = &parent[offset];
933                 if (level == total)
934                         return pte;
935
936                 if (!dma_pte_present(pte)) {
937                         *large_page = total;
938                         break;
939                 }
940
941                 if (dma_pte_superpage(pte)) {
942                         *large_page = total;
943                         return pte;
944                 }
945
946                 parent = phys_to_virt(dma_pte_addr(pte));
947                 total--;
948         }
949         return NULL;
950 }
951
952 /* clear last level pte, a tlb flush should be followed */
953 static void dma_pte_clear_range(struct dmar_domain *domain,
954                                 unsigned long start_pfn,
955                                 unsigned long last_pfn)
956 {
957         unsigned int large_page;
958         struct dma_pte *first_pte, *pte;
959
960         BUG_ON(!domain_pfn_supported(domain, start_pfn));
961         BUG_ON(!domain_pfn_supported(domain, last_pfn));
962         BUG_ON(start_pfn > last_pfn);
963
964         /* we don't need lock here; nobody else touches the iova range */
965         do {
966                 large_page = 1;
967                 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
968                 if (!pte) {
969                         start_pfn = align_to_level(start_pfn + 1, large_page + 1);
970                         continue;
971                 }
972                 do {
973                         dma_clear_pte(pte);
974                         start_pfn += lvl_to_nr_pages(large_page);
975                         pte++;
976                 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
977
978                 domain_flush_cache(domain, first_pte,
979                                    (void *)pte - (void *)first_pte);
980
981         } while (start_pfn && start_pfn <= last_pfn);
982 }
983
984 static void dma_pte_free_level(struct dmar_domain *domain, int level,
985                                int retain_level, struct dma_pte *pte,
986                                unsigned long pfn, unsigned long start_pfn,
987                                unsigned long last_pfn)
988 {
989         pfn = max(start_pfn, pfn);
990         pte = &pte[pfn_level_offset(pfn, level)];
991
992         do {
993                 unsigned long level_pfn;
994                 struct dma_pte *level_pte;
995
996                 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
997                         goto next;
998
999                 level_pfn = pfn & level_mask(level);
1000                 level_pte = phys_to_virt(dma_pte_addr(pte));
1001
1002                 if (level > 2) {
1003                         dma_pte_free_level(domain, level - 1, retain_level,
1004                                            level_pte, level_pfn, start_pfn,
1005                                            last_pfn);
1006                 }
1007
1008                 /*
1009                  * Free the page table if we're below the level we want to
1010                  * retain and the range covers the entire table.
1011                  */
1012                 if (level < retain_level && !(start_pfn > level_pfn ||
1013                       last_pfn < level_pfn + level_size(level) - 1)) {
1014                         dma_clear_pte(pte);
1015                         domain_flush_cache(domain, pte, sizeof(*pte));
1016                         free_pgtable_page(level_pte);
1017                 }
1018 next:
1019                 pfn += level_size(level);
1020         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1021 }
1022
1023 /*
1024  * clear last level (leaf) ptes and free page table pages below the
1025  * level we wish to keep intact.
1026  */
1027 static void dma_pte_free_pagetable(struct dmar_domain *domain,
1028                                    unsigned long start_pfn,
1029                                    unsigned long last_pfn,
1030                                    int retain_level)
1031 {
1032         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1033         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1034         BUG_ON(start_pfn > last_pfn);
1035
1036         dma_pte_clear_range(domain, start_pfn, last_pfn);
1037
1038         /* We don't need lock here; nobody else touches the iova range */
1039         dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
1040                            domain->pgd, 0, start_pfn, last_pfn);
1041
1042         /* free pgd */
1043         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1044                 free_pgtable_page(domain->pgd);
1045                 domain->pgd = NULL;
1046         }
1047 }
1048
1049 /* When a page at a given level is being unlinked from its parent, we don't
1050    need to *modify* it at all. All we need to do is make a list of all the
1051    pages which can be freed just as soon as we've flushed the IOTLB and we
1052    know the hardware page-walk will no longer touch them.
1053    The 'pte' argument is the *parent* PTE, pointing to the page that is to
1054    be freed. */
1055 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1056                                             int level, struct dma_pte *pte,
1057                                             struct page *freelist)
1058 {
1059         struct page *pg;
1060
1061         pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1062         pg->freelist = freelist;
1063         freelist = pg;
1064
1065         if (level == 1)
1066                 return freelist;
1067
1068         pte = page_address(pg);
1069         do {
1070                 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1071                         freelist = dma_pte_list_pagetables(domain, level - 1,
1072                                                            pte, freelist);
1073                 pte++;
1074         } while (!first_pte_in_page(pte));
1075
1076         return freelist;
1077 }
1078
1079 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1080                                         struct dma_pte *pte, unsigned long pfn,
1081                                         unsigned long start_pfn,
1082                                         unsigned long last_pfn,
1083                                         struct page *freelist)
1084 {
1085         struct dma_pte *first_pte = NULL, *last_pte = NULL;
1086
1087         pfn = max(start_pfn, pfn);
1088         pte = &pte[pfn_level_offset(pfn, level)];
1089
1090         do {
1091                 unsigned long level_pfn;
1092
1093                 if (!dma_pte_present(pte))
1094                         goto next;
1095
1096                 level_pfn = pfn & level_mask(level);
1097
1098                 /* If range covers entire pagetable, free it */
1099                 if (start_pfn <= level_pfn &&
1100                     last_pfn >= level_pfn + level_size(level) - 1) {
1101                         /* These suborbinate page tables are going away entirely. Don't
1102                            bother to clear them; we're just going to *free* them. */
1103                         if (level > 1 && !dma_pte_superpage(pte))
1104                                 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1105
1106                         dma_clear_pte(pte);
1107                         if (!first_pte)
1108                                 first_pte = pte;
1109                         last_pte = pte;
1110                 } else if (level > 1) {
1111                         /* Recurse down into a level that isn't *entirely* obsolete */
1112                         freelist = dma_pte_clear_level(domain, level - 1,
1113                                                        phys_to_virt(dma_pte_addr(pte)),
1114                                                        level_pfn, start_pfn, last_pfn,
1115                                                        freelist);
1116                 }
1117 next:
1118                 pfn += level_size(level);
1119         } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1120
1121         if (first_pte)
1122                 domain_flush_cache(domain, first_pte,
1123                                    (void *)++last_pte - (void *)first_pte);
1124
1125         return freelist;
1126 }
1127
1128 /* We can't just free the pages because the IOMMU may still be walking
1129    the page tables, and may have cached the intermediate levels. The
1130    pages can only be freed after the IOTLB flush has been done. */
1131 static struct page *domain_unmap(struct dmar_domain *domain,
1132                                  unsigned long start_pfn,
1133                                  unsigned long last_pfn)
1134 {
1135         struct page *freelist;
1136
1137         BUG_ON(!domain_pfn_supported(domain, start_pfn));
1138         BUG_ON(!domain_pfn_supported(domain, last_pfn));
1139         BUG_ON(start_pfn > last_pfn);
1140
1141         /* we don't need lock here; nobody else touches the iova range */
1142         freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1143                                        domain->pgd, 0, start_pfn, last_pfn, NULL);
1144
1145         /* free pgd */
1146         if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1147                 struct page *pgd_page = virt_to_page(domain->pgd);
1148                 pgd_page->freelist = freelist;
1149                 freelist = pgd_page;
1150
1151                 domain->pgd = NULL;
1152         }
1153
1154         return freelist;
1155 }
1156
1157 static void dma_free_pagelist(struct page *freelist)
1158 {
1159         struct page *pg;
1160
1161         while ((pg = freelist)) {
1162                 freelist = pg->freelist;
1163                 free_pgtable_page(page_address(pg));
1164         }
1165 }
1166
1167 static void iova_entry_free(unsigned long data)
1168 {
1169         struct page *freelist = (struct page *)data;
1170
1171         dma_free_pagelist(freelist);
1172 }
1173
1174 /* iommu handling */
1175 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1176 {
1177         struct root_entry *root;
1178         unsigned long flags;
1179
1180         root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1181         if (!root) {
1182                 pr_err("Allocating root entry for %s failed\n",
1183                         iommu->name);
1184                 return -ENOMEM;
1185         }
1186
1187         __iommu_flush_cache(iommu, root, ROOT_SIZE);
1188
1189         spin_lock_irqsave(&iommu->lock, flags);
1190         iommu->root_entry = root;
1191         spin_unlock_irqrestore(&iommu->lock, flags);
1192
1193         return 0;
1194 }
1195
1196 static void iommu_set_root_entry(struct intel_iommu *iommu)
1197 {
1198         u64 addr;
1199         u32 sts;
1200         unsigned long flag;
1201
1202         addr = virt_to_phys(iommu->root_entry);
1203         if (sm_supported(iommu))
1204                 addr |= DMA_RTADDR_SMT;
1205
1206         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1207         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
1208
1209         writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1210
1211         /* Make sure hardware complete it */
1212         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1213                       readl, (sts & DMA_GSTS_RTPS), sts);
1214
1215         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1216 }
1217
1218 void iommu_flush_write_buffer(struct intel_iommu *iommu)
1219 {
1220         u32 val;
1221         unsigned long flag;
1222
1223         if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1224                 return;
1225
1226         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1227         writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1228
1229         /* Make sure hardware complete it */
1230         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1231                       readl, (!(val & DMA_GSTS_WBFS)), val);
1232
1233         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1234 }
1235
1236 /* return value determine if we need a write buffer flush */
1237 static void __iommu_flush_context(struct intel_iommu *iommu,
1238                                   u16 did, u16 source_id, u8 function_mask,
1239                                   u64 type)
1240 {
1241         u64 val = 0;
1242         unsigned long flag;
1243
1244         switch (type) {
1245         case DMA_CCMD_GLOBAL_INVL:
1246                 val = DMA_CCMD_GLOBAL_INVL;
1247                 break;
1248         case DMA_CCMD_DOMAIN_INVL:
1249                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1250                 break;
1251         case DMA_CCMD_DEVICE_INVL:
1252                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1253                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1254                 break;
1255         default:
1256                 BUG();
1257         }
1258         val |= DMA_CCMD_ICC;
1259
1260         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1261         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1262
1263         /* Make sure hardware complete it */
1264         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1265                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1266
1267         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1268 }
1269
1270 /* return value determine if we need a write buffer flush */
1271 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1272                                 u64 addr, unsigned int size_order, u64 type)
1273 {
1274         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1275         u64 val = 0, val_iva = 0;
1276         unsigned long flag;
1277
1278         switch (type) {
1279         case DMA_TLB_GLOBAL_FLUSH:
1280                 /* global flush doesn't need set IVA_REG */
1281                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1282                 break;
1283         case DMA_TLB_DSI_FLUSH:
1284                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1285                 break;
1286         case DMA_TLB_PSI_FLUSH:
1287                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1288                 /* IH bit is passed in as part of address */
1289                 val_iva = size_order | addr;
1290                 break;
1291         default:
1292                 BUG();
1293         }
1294         /* Note: set drain read/write */
1295 #if 0
1296         /*
1297          * This is probably to be super secure.. Looks like we can
1298          * ignore it without any impact.
1299          */
1300         if (cap_read_drain(iommu->cap))
1301                 val |= DMA_TLB_READ_DRAIN;
1302 #endif
1303         if (cap_write_drain(iommu->cap))
1304                 val |= DMA_TLB_WRITE_DRAIN;
1305
1306         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1307         /* Note: Only uses first TLB reg currently */
1308         if (val_iva)
1309                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1310         dmar_writeq(iommu->reg + tlb_offset + 8, val);
1311
1312         /* Make sure hardware complete it */
1313         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1314                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1315
1316         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1317
1318         /* check IOTLB invalidation granularity */
1319         if (DMA_TLB_IAIG(val) == 0)
1320                 pr_err("Flush IOTLB failed\n");
1321         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1322                 pr_debug("TLB flush request %Lx, actual %Lx\n",
1323                         (unsigned long long)DMA_TLB_IIRG(type),
1324                         (unsigned long long)DMA_TLB_IAIG(val));
1325 }
1326
1327 static struct device_domain_info *
1328 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1329                          u8 bus, u8 devfn)
1330 {
1331         struct device_domain_info *info;
1332
1333         assert_spin_locked(&device_domain_lock);
1334
1335         if (!iommu->qi)
1336                 return NULL;
1337
1338         list_for_each_entry(info, &domain->devices, link)
1339                 if (info->iommu == iommu && info->bus == bus &&
1340                     info->devfn == devfn) {
1341                         if (info->ats_supported && info->dev)
1342                                 return info;
1343                         break;
1344                 }
1345
1346         return NULL;
1347 }
1348
1349 static void domain_update_iotlb(struct dmar_domain *domain)
1350 {
1351         struct device_domain_info *info;
1352         bool has_iotlb_device = false;
1353
1354         assert_spin_locked(&device_domain_lock);
1355
1356         list_for_each_entry(info, &domain->devices, link) {
1357                 struct pci_dev *pdev;
1358
1359                 if (!info->dev || !dev_is_pci(info->dev))
1360                         continue;
1361
1362                 pdev = to_pci_dev(info->dev);
1363                 if (pdev->ats_enabled) {
1364                         has_iotlb_device = true;
1365                         break;
1366                 }
1367         }
1368
1369         domain->has_iotlb_device = has_iotlb_device;
1370 }
1371
1372 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1373 {
1374         struct pci_dev *pdev;
1375
1376         assert_spin_locked(&device_domain_lock);
1377
1378         if (!info || !dev_is_pci(info->dev))
1379                 return;
1380
1381         pdev = to_pci_dev(info->dev);
1382         /* For IOMMU that supports device IOTLB throttling (DIT), we assign
1383          * PFSID to the invalidation desc of a VF such that IOMMU HW can gauge
1384          * queue depth at PF level. If DIT is not set, PFSID will be treated as
1385          * reserved, which should be set to 0.
1386          */
1387         if (!ecap_dit(info->iommu->ecap))
1388                 info->pfsid = 0;
1389         else {
1390                 struct pci_dev *pf_pdev;
1391
1392                 /* pdev will be returned if device is not a vf */
1393                 pf_pdev = pci_physfn(pdev);
1394                 info->pfsid = PCI_DEVID(pf_pdev->bus->number, pf_pdev->devfn);
1395         }
1396
1397 #ifdef CONFIG_INTEL_IOMMU_SVM
1398         /* The PCIe spec, in its wisdom, declares that the behaviour of
1399            the device if you enable PASID support after ATS support is
1400            undefined. So always enable PASID support on devices which
1401            have it, even if we can't yet know if we're ever going to
1402            use it. */
1403         if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1))
1404                 info->pasid_enabled = 1;
1405
1406         if (info->pri_supported &&
1407             (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1)  &&
1408             !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32))
1409                 info->pri_enabled = 1;
1410 #endif
1411         if (!pdev->untrusted && info->ats_supported &&
1412             pci_ats_page_aligned(pdev) &&
1413             !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) {
1414                 info->ats_enabled = 1;
1415                 domain_update_iotlb(info->domain);
1416                 info->ats_qdep = pci_ats_queue_depth(pdev);
1417         }
1418 }
1419
1420 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1421 {
1422         struct pci_dev *pdev;
1423
1424         assert_spin_locked(&device_domain_lock);
1425
1426         if (!dev_is_pci(info->dev))
1427                 return;
1428
1429         pdev = to_pci_dev(info->dev);
1430
1431         if (info->ats_enabled) {
1432                 pci_disable_ats(pdev);
1433                 info->ats_enabled = 0;
1434                 domain_update_iotlb(info->domain);
1435         }
1436 #ifdef CONFIG_INTEL_IOMMU_SVM
1437         if (info->pri_enabled) {
1438                 pci_disable_pri(pdev);
1439                 info->pri_enabled = 0;
1440         }
1441         if (info->pasid_enabled) {
1442                 pci_disable_pasid(pdev);
1443                 info->pasid_enabled = 0;
1444         }
1445 #endif
1446 }
1447
1448 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1449                                   u64 addr, unsigned mask)
1450 {
1451         u16 sid, qdep;
1452         unsigned long flags;
1453         struct device_domain_info *info;
1454
1455         if (!domain->has_iotlb_device)
1456                 return;
1457
1458         spin_lock_irqsave(&device_domain_lock, flags);
1459         list_for_each_entry(info, &domain->devices, link) {
1460                 if (!info->ats_enabled)
1461                         continue;
1462
1463                 sid = info->bus << 8 | info->devfn;
1464                 qdep = info->ats_qdep;
1465                 qi_flush_dev_iotlb(info->iommu, sid, info->pfsid,
1466                                 qdep, addr, mask);
1467         }
1468         spin_unlock_irqrestore(&device_domain_lock, flags);
1469 }
1470
1471 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1472                                   struct dmar_domain *domain,
1473                                   unsigned long pfn, unsigned int pages,
1474                                   int ih, int map)
1475 {
1476         unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1477         uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1478         u16 did = domain->iommu_did[iommu->seq_id];
1479
1480         BUG_ON(pages == 0);
1481
1482         if (ih)
1483                 ih = 1 << 6;
1484         /*
1485          * Fallback to domain selective flush if no PSI support or the size is
1486          * too big.
1487          * PSI requires page size to be 2 ^ x, and the base address is naturally
1488          * aligned to the size
1489          */
1490         if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1491                 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1492                                                 DMA_TLB_DSI_FLUSH);
1493         else
1494                 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1495                                                 DMA_TLB_PSI_FLUSH);
1496
1497         /*
1498          * In caching mode, changes of pages from non-present to present require
1499          * flush. However, device IOTLB doesn't need to be flushed in this case.
1500          */
1501         if (!cap_caching_mode(iommu->cap) || !map)
1502                 iommu_flush_dev_iotlb(domain, addr, mask);
1503 }
1504
1505 /* Notification for newly created mappings */
1506 static inline void __mapping_notify_one(struct intel_iommu *iommu,
1507                                         struct dmar_domain *domain,
1508                                         unsigned long pfn, unsigned int pages)
1509 {
1510         /* It's a non-present to present mapping. Only flush if caching mode */
1511         if (cap_caching_mode(iommu->cap))
1512                 iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
1513         else
1514                 iommu_flush_write_buffer(iommu);
1515 }
1516
1517 static void iommu_flush_iova(struct iova_domain *iovad)
1518 {
1519         struct dmar_domain *domain;
1520         int idx;
1521
1522         domain = container_of(iovad, struct dmar_domain, iovad);
1523
1524         for_each_domain_iommu(idx, domain) {
1525                 struct intel_iommu *iommu = g_iommus[idx];
1526                 u16 did = domain->iommu_did[iommu->seq_id];
1527
1528                 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
1529
1530                 if (!cap_caching_mode(iommu->cap))
1531                         iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1532                                               0, MAX_AGAW_PFN_WIDTH);
1533         }
1534 }
1535
1536 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1537 {
1538         u32 pmen;
1539         unsigned long flags;
1540
1541         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1542         pmen = readl(iommu->reg + DMAR_PMEN_REG);
1543         pmen &= ~DMA_PMEN_EPM;
1544         writel(pmen, iommu->reg + DMAR_PMEN_REG);
1545
1546         /* wait for the protected region status bit to clear */
1547         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1548                 readl, !(pmen & DMA_PMEN_PRS), pmen);
1549
1550         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1551 }
1552
1553 static void iommu_enable_translation(struct intel_iommu *iommu)
1554 {
1555         u32 sts;
1556         unsigned long flags;
1557
1558         raw_spin_lock_irqsave(&iommu->register_lock, flags);
1559         iommu->gcmd |= DMA_GCMD_TE;
1560         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1561
1562         /* Make sure hardware complete it */
1563         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1564                       readl, (sts & DMA_GSTS_TES), sts);
1565
1566         raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1567 }
1568
1569 static void iommu_disable_translation(struct intel_iommu *iommu)
1570 {
1571         u32 sts;
1572         unsigned long flag;
1573
1574         raw_spin_lock_irqsave(&iommu->register_lock, flag);
1575         iommu->gcmd &= ~DMA_GCMD_TE;
1576         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1577
1578         /* Make sure hardware complete it */
1579         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1580                       readl, (!(sts & DMA_GSTS_TES)), sts);
1581
1582         raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1583 }
1584
1585
1586 static int iommu_init_domains(struct intel_iommu *iommu)
1587 {
1588         u32 ndomains, nlongs;
1589         size_t size;
1590
1591         ndomains = cap_ndoms(iommu->cap);
1592         pr_debug("%s: Number of Domains supported <%d>\n",
1593                  iommu->name, ndomains);
1594         nlongs = BITS_TO_LONGS(ndomains);
1595
1596         spin_lock_init(&iommu->lock);
1597
1598         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1599         if (!iommu->domain_ids) {
1600                 pr_err("%s: Allocating domain id array failed\n",
1601                        iommu->name);
1602                 return -ENOMEM;
1603         }
1604
1605         size = (ALIGN(ndomains, 256) >> 8) * sizeof(struct dmar_domain **);
1606         iommu->domains = kzalloc(size, GFP_KERNEL);
1607
1608         if (iommu->domains) {
1609                 size = 256 * sizeof(struct dmar_domain *);
1610                 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1611         }
1612
1613         if (!iommu->domains || !iommu->domains[0]) {
1614                 pr_err("%s: Allocating domain array failed\n",
1615                        iommu->name);
1616                 kfree(iommu->domain_ids);
1617                 kfree(iommu->domains);
1618                 iommu->domain_ids = NULL;
1619                 iommu->domains    = NULL;
1620                 return -ENOMEM;
1621         }
1622
1623
1624
1625         /*
1626          * If Caching mode is set, then invalid translations are tagged
1627          * with domain-id 0, hence we need to pre-allocate it. We also
1628          * use domain-id 0 as a marker for non-allocated domain-id, so
1629          * make sure it is not used for a real domain.
1630          */
1631         set_bit(0, iommu->domain_ids);
1632
1633         /*
1634          * Vt-d spec rev3.0 (section 6.2.3.1) requires that each pasid
1635          * entry for first-level or pass-through translation modes should
1636          * be programmed with a domain id different from those used for
1637          * second-level or nested translation. We reserve a domain id for
1638          * this purpose.
1639          */
1640         if (sm_supported(iommu))
1641                 set_bit(FLPT_DEFAULT_DID, iommu->domain_ids);
1642
1643         return 0;
1644 }
1645
1646 static void disable_dmar_iommu(struct intel_iommu *iommu)
1647 {
1648         struct device_domain_info *info, *tmp;
1649         unsigned long flags;
1650
1651         if (!iommu->domains || !iommu->domain_ids)
1652                 return;
1653
1654 again:
1655         spin_lock_irqsave(&device_domain_lock, flags);
1656         list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1657                 struct dmar_domain *domain;
1658
1659                 if (info->iommu != iommu)
1660                         continue;
1661
1662                 if (!info->dev || !info->domain)
1663                         continue;
1664
1665                 domain = info->domain;
1666
1667                 __dmar_remove_one_dev_info(info);
1668
1669                 if (!domain_type_is_vm_or_si(domain)) {
1670                         /*
1671                          * The domain_exit() function  can't be called under
1672                          * device_domain_lock, as it takes this lock itself.
1673                          * So release the lock here and re-run the loop
1674                          * afterwards.
1675                          */
1676                         spin_unlock_irqrestore(&device_domain_lock, flags);
1677                         domain_exit(domain);
1678                         goto again;
1679                 }
1680         }
1681         spin_unlock_irqrestore(&device_domain_lock, flags);
1682
1683         if (iommu->gcmd & DMA_GCMD_TE)
1684                 iommu_disable_translation(iommu);
1685 }
1686
1687 static void free_dmar_iommu(struct intel_iommu *iommu)
1688 {
1689         if ((iommu->domains) && (iommu->domain_ids)) {
1690                 int elems = ALIGN(cap_ndoms(iommu->cap), 256) >> 8;
1691                 int i;
1692
1693                 for (i = 0; i < elems; i++)
1694                         kfree(iommu->domains[i]);
1695                 kfree(iommu->domains);
1696                 kfree(iommu->domain_ids);
1697                 iommu->domains = NULL;
1698                 iommu->domain_ids = NULL;
1699         }
1700
1701         g_iommus[iommu->seq_id] = NULL;
1702
1703         /* free context mapping */
1704         free_context_table(iommu);
1705
1706 #ifdef CONFIG_INTEL_IOMMU_SVM
1707         if (pasid_supported(iommu)) {
1708                 if (ecap_prs(iommu->ecap))
1709                         intel_svm_finish_prq(iommu);
1710         }
1711 #endif
1712 }
1713
1714 static struct dmar_domain *alloc_domain(int flags)
1715 {
1716         struct dmar_domain *domain;
1717
1718         domain = alloc_domain_mem();
1719         if (!domain)
1720                 return NULL;
1721
1722         memset(domain, 0, sizeof(*domain));
1723         domain->nid = NUMA_NO_NODE;
1724         domain->flags = flags;
1725         domain->has_iotlb_device = false;
1726         INIT_LIST_HEAD(&domain->devices);
1727
1728         return domain;
1729 }
1730
1731 /* Must be called with iommu->lock */
1732 static int domain_attach_iommu(struct dmar_domain *domain,
1733                                struct intel_iommu *iommu)
1734 {
1735         unsigned long ndomains;
1736         int num;
1737
1738         assert_spin_locked(&device_domain_lock);
1739         assert_spin_locked(&iommu->lock);
1740
1741         domain->iommu_refcnt[iommu->seq_id] += 1;
1742         domain->iommu_count += 1;
1743         if (domain->iommu_refcnt[iommu->seq_id] == 1) {
1744                 ndomains = cap_ndoms(iommu->cap);
1745                 num      = find_first_zero_bit(iommu->domain_ids, ndomains);
1746
1747                 if (num >= ndomains) {
1748                         pr_err("%s: No free domain ids\n", iommu->name);
1749                         domain->iommu_refcnt[iommu->seq_id] -= 1;
1750                         domain->iommu_count -= 1;
1751                         return -ENOSPC;
1752                 }
1753
1754                 set_bit(num, iommu->domain_ids);
1755                 set_iommu_domain(iommu, num, domain);
1756
1757                 domain->iommu_did[iommu->seq_id] = num;
1758                 domain->nid                      = iommu->node;
1759
1760                 domain_update_iommu_cap(domain);
1761         }
1762
1763         return 0;
1764 }
1765
1766 static int domain_detach_iommu(struct dmar_domain *domain,
1767                                struct intel_iommu *iommu)
1768 {
1769         int num, count;
1770
1771         assert_spin_locked(&device_domain_lock);
1772         assert_spin_locked(&iommu->lock);
1773
1774         domain->iommu_refcnt[iommu->seq_id] -= 1;
1775         count = --domain->iommu_count;
1776         if (domain->iommu_refcnt[iommu->seq_id] == 0) {
1777                 num = domain->iommu_did[iommu->seq_id];
1778                 clear_bit(num, iommu->domain_ids);
1779                 set_iommu_domain(iommu, num, NULL);
1780
1781                 domain_update_iommu_cap(domain);
1782                 domain->iommu_did[iommu->seq_id] = 0;
1783         }
1784
1785         return count;
1786 }
1787
1788 static struct iova_domain reserved_iova_list;
1789 static struct lock_class_key reserved_rbtree_key;
1790
1791 static int dmar_init_reserved_ranges(void)
1792 {
1793         struct pci_dev *pdev = NULL;
1794         struct iova *iova;
1795         int i;
1796
1797         init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN);
1798
1799         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1800                 &reserved_rbtree_key);
1801
1802         /* IOAPIC ranges shouldn't be accessed by DMA */
1803         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1804                 IOVA_PFN(IOAPIC_RANGE_END));
1805         if (!iova) {
1806                 pr_err("Reserve IOAPIC range failed\n");
1807                 return -ENODEV;
1808         }
1809
1810         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1811         for_each_pci_dev(pdev) {
1812                 struct resource *r;
1813
1814                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1815                         r = &pdev->resource[i];
1816                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1817                                 continue;
1818                         iova = reserve_iova(&reserved_iova_list,
1819                                             IOVA_PFN(r->start),
1820                                             IOVA_PFN(r->end));
1821                         if (!iova) {
1822                                 pci_err(pdev, "Reserve iova for %pR failed\n", r);
1823                                 return -ENODEV;
1824                         }
1825                 }
1826         }
1827         return 0;
1828 }
1829
1830 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1831 {
1832         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1833 }
1834
1835 static inline int guestwidth_to_adjustwidth(int gaw)
1836 {
1837         int agaw;
1838         int r = (gaw - 12) % 9;
1839
1840         if (r == 0)
1841                 agaw = gaw;
1842         else
1843                 agaw = gaw + 9 - r;
1844         if (agaw > 64)
1845                 agaw = 64;
1846         return agaw;
1847 }
1848
1849 static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1850                        int guest_width)
1851 {
1852         int adjust_width, agaw;
1853         unsigned long sagaw;
1854         int err;
1855
1856         init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
1857
1858         err = init_iova_flush_queue(&domain->iovad,
1859                                     iommu_flush_iova, iova_entry_free);
1860         if (err)
1861                 return err;
1862
1863         domain_reserve_special_ranges(domain);
1864
1865         /* calculate AGAW */
1866         if (guest_width > cap_mgaw(iommu->cap))
1867                 guest_width = cap_mgaw(iommu->cap);
1868         domain->gaw = guest_width;
1869         adjust_width = guestwidth_to_adjustwidth(guest_width);
1870         agaw = width_to_agaw(adjust_width);
1871         sagaw = cap_sagaw(iommu->cap);
1872         if (!test_bit(agaw, &sagaw)) {
1873                 /* hardware doesn't support it, choose a bigger one */
1874                 pr_debug("Hardware doesn't support agaw %d\n", agaw);
1875                 agaw = find_next_bit(&sagaw, 5, agaw);
1876                 if (agaw >= 5)
1877                         return -ENODEV;
1878         }
1879         domain->agaw = agaw;
1880
1881         if (ecap_coherent(iommu->ecap))
1882                 domain->iommu_coherency = 1;
1883         else
1884                 domain->iommu_coherency = 0;
1885
1886         if (ecap_sc_support(iommu->ecap))
1887                 domain->iommu_snooping = 1;
1888         else
1889                 domain->iommu_snooping = 0;
1890
1891         if (intel_iommu_superpage)
1892                 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1893         else
1894                 domain->iommu_superpage = 0;
1895
1896         domain->nid = iommu->node;
1897
1898         /* always allocate the top pgd */
1899         domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1900         if (!domain->pgd)
1901                 return -ENOMEM;
1902         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1903         return 0;
1904 }
1905
1906 static void domain_exit(struct dmar_domain *domain)
1907 {
1908         struct page *freelist;
1909
1910         /* Remove associated devices and clear attached or cached domains */
1911         rcu_read_lock();
1912         domain_remove_dev_info(domain);
1913         rcu_read_unlock();
1914
1915         /* destroy iovas */
1916         put_iova_domain(&domain->iovad);
1917
1918         freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1919
1920         dma_free_pagelist(freelist);
1921
1922         free_domain_mem(domain);
1923 }
1924
1925 /*
1926  * Get the PASID directory size for scalable mode context entry.
1927  * Value of X in the PDTS field of a scalable mode context entry
1928  * indicates PASID directory with 2^(X + 7) entries.
1929  */
1930 static inline unsigned long context_get_sm_pds(struct pasid_table *table)
1931 {
1932         int pds, max_pde;
1933
1934         max_pde = table->max_pasid >> PASID_PDE_SHIFT;
1935         pds = find_first_bit((unsigned long *)&max_pde, MAX_NR_PASID_BITS);
1936         if (pds < 7)
1937                 return 0;
1938
1939         return pds - 7;
1940 }
1941
1942 /*
1943  * Set the RID_PASID field of a scalable mode context entry. The
1944  * IOMMU hardware will use the PASID value set in this field for
1945  * DMA translations of DMA requests without PASID.
1946  */
1947 static inline void
1948 context_set_sm_rid2pasid(struct context_entry *context, unsigned long pasid)
1949 {
1950         context->hi |= pasid & ((1 << 20) - 1);
1951         context->hi |= (1 << 20);
1952 }
1953
1954 /*
1955  * Set the DTE(Device-TLB Enable) field of a scalable mode context
1956  * entry.
1957  */
1958 static inline void context_set_sm_dte(struct context_entry *context)
1959 {
1960         context->lo |= (1 << 2);
1961 }
1962
1963 /*
1964  * Set the PRE(Page Request Enable) field of a scalable mode context
1965  * entry.
1966  */
1967 static inline void context_set_sm_pre(struct context_entry *context)
1968 {
1969         context->lo |= (1 << 4);
1970 }
1971
1972 /* Convert value to context PASID directory size field coding. */
1973 #define context_pdts(pds)       (((pds) & 0x7) << 9)
1974
1975 static int domain_context_mapping_one(struct dmar_domain *domain,
1976                                       struct intel_iommu *iommu,
1977                                       struct pasid_table *table,
1978                                       u8 bus, u8 devfn)
1979 {
1980         u16 did = domain->iommu_did[iommu->seq_id];
1981         int translation = CONTEXT_TT_MULTI_LEVEL;
1982         struct device_domain_info *info = NULL;
1983         struct context_entry *context;
1984         unsigned long flags;
1985         int ret;
1986
1987         WARN_ON(did == 0);
1988
1989         if (hw_pass_through && domain_type_is_si(domain))
1990                 translation = CONTEXT_TT_PASS_THROUGH;
1991
1992         pr_debug("Set context mapping for %02x:%02x.%d\n",
1993                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1994
1995         BUG_ON(!domain->pgd);
1996
1997         spin_lock_irqsave(&device_domain_lock, flags);
1998         spin_lock(&iommu->lock);
1999
2000         ret = -ENOMEM;
2001         context = iommu_context_addr(iommu, bus, devfn, 1);
2002         if (!context)
2003                 goto out_unlock;
2004
2005         ret = 0;
2006         if (context_present(context))
2007                 goto out_unlock;
2008
2009         /*
2010          * For kdump cases, old valid entries may be cached due to the
2011          * in-flight DMA and copied pgtable, but there is no unmapping
2012          * behaviour for them, thus we need an explicit cache flush for
2013          * the newly-mapped device. For kdump, at this point, the device
2014          * is supposed to finish reset at its driver probe stage, so no
2015          * in-flight DMA will exist, and we don't need to worry anymore
2016          * hereafter.
2017          */
2018         if (context_copied(context)) {
2019                 u16 did_old = context_domain_id(context);
2020
2021                 if (did_old < cap_ndoms(iommu->cap)) {
2022                         iommu->flush.flush_context(iommu, did_old,
2023                                                    (((u16)bus) << 8) | devfn,
2024                                                    DMA_CCMD_MASK_NOBIT,
2025                                                    DMA_CCMD_DEVICE_INVL);
2026                         iommu->flush.flush_iotlb(iommu, did_old, 0, 0,
2027                                                  DMA_TLB_DSI_FLUSH);
2028                 }
2029         }
2030
2031         context_clear_entry(context);
2032
2033         if (sm_supported(iommu)) {
2034                 unsigned long pds;
2035
2036                 WARN_ON(!table);
2037
2038                 /* Setup the PASID DIR pointer: */
2039                 pds = context_get_sm_pds(table);
2040                 context->lo = (u64)virt_to_phys(table->table) |
2041                                 context_pdts(pds);
2042
2043                 /* Setup the RID_PASID field: */
2044                 context_set_sm_rid2pasid(context, PASID_RID2PASID);
2045
2046                 /*
2047                  * Setup the Device-TLB enable bit and Page request
2048                  * Enable bit:
2049                  */
2050                 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2051                 if (info && info->ats_supported)
2052                         context_set_sm_dte(context);
2053                 if (info && info->pri_supported)
2054                         context_set_sm_pre(context);
2055         } else {
2056                 struct dma_pte *pgd = domain->pgd;
2057                 int agaw;
2058
2059                 context_set_domain_id(context, did);
2060
2061                 if (translation != CONTEXT_TT_PASS_THROUGH) {
2062                         /*
2063                          * Skip top levels of page tables for iommu which has
2064                          * less agaw than default. Unnecessary for PT mode.
2065                          */
2066                         for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
2067                                 ret = -ENOMEM;
2068                                 pgd = phys_to_virt(dma_pte_addr(pgd));
2069                                 if (!dma_pte_present(pgd))
2070                                         goto out_unlock;
2071                         }
2072
2073                         info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
2074                         if (info && info->ats_supported)
2075                                 translation = CONTEXT_TT_DEV_IOTLB;
2076                         else
2077                                 translation = CONTEXT_TT_MULTI_LEVEL;
2078
2079                         context_set_address_root(context, virt_to_phys(pgd));
2080                         context_set_address_width(context, agaw);
2081                 } else {
2082                         /*
2083                          * In pass through mode, AW must be programmed to
2084                          * indicate the largest AGAW value supported by
2085                          * hardware. And ASR is ignored by hardware.
2086                          */
2087                         context_set_address_width(context, iommu->msagaw);
2088                 }
2089
2090                 context_set_translation_type(context, translation);
2091         }
2092
2093         context_set_fault_enable(context);
2094         context_set_present(context);
2095         domain_flush_cache(domain, context, sizeof(*context));
2096
2097         /*
2098          * It's a non-present to present mapping. If hardware doesn't cache
2099          * non-present entry we only need to flush the write-buffer. If the
2100          * _does_ cache non-present entries, then it does so in the special
2101          * domain #0, which we have to flush:
2102          */
2103         if (cap_caching_mode(iommu->cap)) {
2104                 iommu->flush.flush_context(iommu, 0,
2105                                            (((u16)bus) << 8) | devfn,
2106                                            DMA_CCMD_MASK_NOBIT,
2107                                            DMA_CCMD_DEVICE_INVL);
2108                 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
2109         } else {
2110                 iommu_flush_write_buffer(iommu);
2111         }
2112         iommu_enable_dev_iotlb(info);
2113
2114         ret = 0;
2115
2116 out_unlock:
2117         spin_unlock(&iommu->lock);
2118         spin_unlock_irqrestore(&device_domain_lock, flags);
2119
2120         return ret;
2121 }
2122
2123 struct domain_context_mapping_data {
2124         struct dmar_domain *domain;
2125         struct intel_iommu *iommu;
2126         struct pasid_table *table;
2127 };
2128
2129 static int domain_context_mapping_cb(struct pci_dev *pdev,
2130                                      u16 alias, void *opaque)
2131 {
2132         struct domain_context_mapping_data *data = opaque;
2133
2134         return domain_context_mapping_one(data->domain, data->iommu,
2135                                           data->table, PCI_BUS_NUM(alias),
2136                                           alias & 0xff);
2137 }
2138
2139 static int
2140 domain_context_mapping(struct dmar_domain *domain, struct device *dev)
2141 {
2142         struct domain_context_mapping_data data;
2143         struct pasid_table *table;
2144         struct intel_iommu *iommu;
2145         u8 bus, devfn;
2146
2147         iommu = device_to_iommu(dev, &bus, &devfn);
2148         if (!iommu)
2149                 return -ENODEV;
2150
2151         table = intel_pasid_get_table(dev);
2152
2153         if (!dev_is_pci(dev))
2154                 return domain_context_mapping_one(domain, iommu, table,
2155                                                   bus, devfn);
2156
2157         data.domain = domain;
2158         data.iommu = iommu;
2159         data.table = table;
2160
2161         return pci_for_each_dma_alias(to_pci_dev(dev),
2162                                       &domain_context_mapping_cb, &data);
2163 }
2164
2165 static int domain_context_mapped_cb(struct pci_dev *pdev,
2166                                     u16 alias, void *opaque)
2167 {
2168         struct intel_iommu *iommu = opaque;
2169
2170         return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
2171 }
2172
2173 static int domain_context_mapped(struct device *dev)
2174 {
2175         struct intel_iommu *iommu;
2176         u8 bus, devfn;
2177
2178         iommu = device_to_iommu(dev, &bus, &devfn);
2179         if (!iommu)
2180                 return -ENODEV;
2181
2182         if (!dev_is_pci(dev))
2183                 return device_context_mapped(iommu, bus, devfn);
2184
2185         return !pci_for_each_dma_alias(to_pci_dev(dev),
2186                                        domain_context_mapped_cb, iommu);
2187 }
2188
2189 /* Returns a number of VTD pages, but aligned to MM page size */
2190 static inline unsigned long aligned_nrpages(unsigned long host_addr,
2191                                             size_t size)
2192 {
2193         host_addr &= ~PAGE_MASK;
2194         return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2195 }
2196
2197 /* Return largest possible superpage level for a given mapping */
2198 static inline int hardware_largepage_caps(struct dmar_domain *domain,
2199                                           unsigned long iov_pfn,
2200                                           unsigned long phy_pfn,
2201                                           unsigned long pages)
2202 {
2203         int support, level = 1;
2204         unsigned long pfnmerge;
2205
2206         support = domain->iommu_superpage;
2207
2208         /* To use a large page, the virtual *and* physical addresses
2209            must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2210            of them will mean we have to use smaller pages. So just
2211            merge them and check both at once. */
2212         pfnmerge = iov_pfn | phy_pfn;
2213
2214         while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2215                 pages >>= VTD_STRIDE_SHIFT;
2216                 if (!pages)
2217                         break;
2218                 pfnmerge >>= VTD_STRIDE_SHIFT;
2219                 level++;
2220                 support--;
2221         }
2222         return level;
2223 }
2224
2225 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2226                             struct scatterlist *sg, unsigned long phys_pfn,
2227                             unsigned long nr_pages, int prot)
2228 {
2229         struct dma_pte *first_pte = NULL, *pte = NULL;
2230         phys_addr_t uninitialized_var(pteval);
2231         unsigned long sg_res = 0;
2232         unsigned int largepage_lvl = 0;
2233         unsigned long lvl_pages = 0;
2234
2235         BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
2236
2237         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2238                 return -EINVAL;
2239
2240         prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2241
2242         if (!sg) {
2243                 sg_res = nr_pages;
2244                 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2245         }
2246
2247         while (nr_pages > 0) {
2248                 uint64_t tmp;
2249
2250                 if (!sg_res) {
2251                         unsigned int pgoff = sg->offset & ~PAGE_MASK;
2252
2253                         sg_res = aligned_nrpages(sg->offset, sg->length);
2254                         sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + pgoff;
2255                         sg->dma_length = sg->length;
2256                         pteval = (sg_phys(sg) - pgoff) | prot;
2257                         phys_pfn = pteval >> VTD_PAGE_SHIFT;
2258                 }
2259
2260                 if (!pte) {
2261                         largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2262
2263                         first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2264                         if (!pte)
2265                                 return -ENOMEM;
2266                         /* It is large page*/
2267                         if (largepage_lvl > 1) {
2268                                 unsigned long nr_superpages, end_pfn;
2269
2270                                 pteval |= DMA_PTE_LARGE_PAGE;
2271                                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2272
2273                                 nr_superpages = sg_res / lvl_pages;
2274                                 end_pfn = iov_pfn + nr_superpages * lvl_pages - 1;
2275
2276                                 /*
2277                                  * Ensure that old small page tables are
2278                                  * removed to make room for superpage(s).
2279                                  * We're adding new large pages, so make sure
2280                                  * we don't remove their parent tables.
2281                                  */
2282                                 dma_pte_free_pagetable(domain, iov_pfn, end_pfn,
2283                                                        largepage_lvl + 1);
2284                         } else {
2285                                 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2286                         }
2287
2288                 }
2289                 /* We don't need lock here, nobody else
2290                  * touches the iova range
2291                  */
2292                 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2293                 if (tmp) {
2294                         static int dumps = 5;
2295                         pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2296                                 iov_pfn, tmp, (unsigned long long)pteval);
2297                         if (dumps) {
2298                                 dumps--;
2299                                 debug_dma_dump_mappings(NULL);
2300                         }
2301                         WARN_ON(1);
2302                 }
2303
2304                 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2305
2306                 BUG_ON(nr_pages < lvl_pages);
2307                 BUG_ON(sg_res < lvl_pages);
2308
2309                 nr_pages -= lvl_pages;
2310                 iov_pfn += lvl_pages;
2311                 phys_pfn += lvl_pages;
2312                 pteval += lvl_pages * VTD_PAGE_SIZE;
2313                 sg_res -= lvl_pages;
2314
2315                 /* If the next PTE would be the first in a new page, then we
2316                    need to flush the cache on the entries we've just written.
2317                    And then we'll need to recalculate 'pte', so clear it and
2318                    let it get set again in the if (!pte) block above.
2319
2320                    If we're done (!nr_pages) we need to flush the cache too.
2321
2322                    Also if we've been setting superpages, we may need to
2323                    recalculate 'pte' and switch back to smaller pages for the
2324                    end of the mapping, if the trailing size is not enough to
2325                    use another superpage (i.e. sg_res < lvl_pages). */
2326                 pte++;
2327                 if (!nr_pages || first_pte_in_page(pte) ||
2328                     (largepage_lvl > 1 && sg_res < lvl_pages)) {
2329                         domain_flush_cache(domain, first_pte,
2330                                            (void *)pte - (void *)first_pte);
2331                         pte = NULL;
2332                 }
2333
2334                 if (!sg_res && nr_pages)
2335                         sg = sg_next(sg);
2336         }
2337         return 0;
2338 }
2339
2340 static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2341                          struct scatterlist *sg, unsigned long phys_pfn,
2342                          unsigned long nr_pages, int prot)
2343 {
2344        int ret;
2345        struct intel_iommu *iommu;
2346
2347        /* Do the real mapping first */
2348        ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot);
2349        if (ret)
2350                return ret;
2351
2352        /* Notify about the new mapping */
2353        if (domain_type_is_vm(domain)) {
2354                /* VM typed domains can have more than one IOMMUs */
2355                int iommu_id;
2356                for_each_domain_iommu(iommu_id, domain) {
2357                        iommu = g_iommus[iommu_id];
2358                        __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2359                }
2360        } else {
2361                /* General domains only have one IOMMU */
2362                iommu = domain_get_iommu(domain);
2363                __mapping_notify_one(iommu, domain, iov_pfn, nr_pages);
2364        }
2365
2366        return 0;
2367 }
2368
2369 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2370                                     struct scatterlist *sg, unsigned long nr_pages,
2371                                     int prot)
2372 {
2373         return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2374 }
2375
2376 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2377                                      unsigned long phys_pfn, unsigned long nr_pages,
2378                                      int prot)
2379 {
2380         return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2381 }
2382
2383 static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
2384 {
2385         unsigned long flags;
2386         struct context_entry *context;
2387         u16 did_old;
2388
2389         if (!iommu)
2390                 return;
2391
2392         spin_lock_irqsave(&iommu->lock, flags);
2393         context = iommu_context_addr(iommu, bus, devfn, 0);
2394         if (!context) {
2395                 spin_unlock_irqrestore(&iommu->lock, flags);
2396                 return;
2397         }
2398         did_old = context_domain_id(context);
2399         context_clear_entry(context);
2400         __iommu_flush_cache(iommu, context, sizeof(*context));
2401         spin_unlock_irqrestore(&iommu->lock, flags);
2402         iommu->flush.flush_context(iommu,
2403                                    did_old,
2404                                    (((u16)bus) << 8) | devfn,
2405                                    DMA_CCMD_MASK_NOBIT,
2406                                    DMA_CCMD_DEVICE_INVL);
2407         iommu->flush.flush_iotlb(iommu,
2408                                  did_old,
2409                                  0,
2410                                  0,
2411                                  DMA_TLB_DSI_FLUSH);
2412 }
2413
2414 static inline void unlink_domain_info(struct device_domain_info *info)
2415 {
2416         assert_spin_locked(&device_domain_lock);
2417         list_del(&info->link);
2418         list_del(&info->global);
2419         if (info->dev)
2420                 info->dev->archdata.iommu = NULL;
2421 }
2422
2423 static void domain_remove_dev_info(struct dmar_domain *domain)
2424 {
2425         struct device_domain_info *info, *tmp;
2426         unsigned long flags;
2427
2428         spin_lock_irqsave(&device_domain_lock, flags);
2429         list_for_each_entry_safe(info, tmp, &domain->devices, link)
2430                 __dmar_remove_one_dev_info(info);
2431         spin_unlock_irqrestore(&device_domain_lock, flags);
2432 }
2433
2434 /*
2435  * find_domain
2436  * Note: we use struct device->archdata.iommu stores the info
2437  */
2438 static struct dmar_domain *find_domain(struct device *dev)
2439 {
2440         struct device_domain_info *info;
2441
2442         /* No lock here, assumes no domain exit in normal case */
2443         info = dev->archdata.iommu;
2444         if (likely(info))
2445                 return info->domain;
2446         return NULL;
2447 }
2448
2449 static inline struct device_domain_info *
2450 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2451 {
2452         struct device_domain_info *info;
2453
2454         list_for_each_entry(info, &device_domain_list, global)
2455                 if (info->iommu->segment == segment && info->bus == bus &&
2456                     info->devfn == devfn)
2457                         return info;
2458
2459         return NULL;
2460 }
2461
2462 static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2463                                                     int bus, int devfn,
2464                                                     struct device *dev,
2465                                                     struct dmar_domain *domain)
2466 {
2467         struct dmar_domain *found = NULL;
2468         struct device_domain_info *info;
2469         unsigned long flags;
2470         int ret;
2471
2472         info = alloc_devinfo_mem();
2473         if (!info)
2474                 return NULL;
2475
2476         info->bus = bus;
2477         info->devfn = devfn;
2478         info->ats_supported = info->pasid_supported = info->pri_supported = 0;
2479         info->ats_enabled = info->pasid_enabled = info->pri_enabled = 0;
2480         info->ats_qdep = 0;
2481         info->dev = dev;
2482         info->domain = domain;
2483         info->iommu = iommu;
2484         info->pasid_table = NULL;
2485
2486         if (dev && dev_is_pci(dev)) {
2487                 struct pci_dev *pdev = to_pci_dev(info->dev);
2488
2489                 if (!pdev->untrusted &&
2490                     !pci_ats_disabled() &&
2491                     ecap_dev_iotlb_support(iommu->ecap) &&
2492                     pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) &&
2493                     dmar_find_matched_atsr_unit(pdev))
2494                         info->ats_supported = 1;
2495
2496                 if (sm_supported(iommu)) {
2497                         if (pasid_supported(iommu)) {
2498                                 int features = pci_pasid_features(pdev);
2499                                 if (features >= 0)
2500                                         info->pasid_supported = features | 1;
2501                         }
2502
2503                         if (info->ats_supported && ecap_prs(iommu->ecap) &&
2504                             pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI))
2505                                 info->pri_supported = 1;
2506                 }
2507         }
2508
2509         spin_lock_irqsave(&device_domain_lock, flags);
2510         if (dev)
2511                 found = find_domain(dev);
2512
2513         if (!found) {
2514                 struct device_domain_info *info2;
2515                 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2516                 if (info2) {
2517                         found      = info2->domain;
2518                         info2->dev = dev;
2519                 }
2520         }
2521
2522         if (found) {
2523                 spin_unlock_irqrestore(&device_domain_lock, flags);
2524                 free_devinfo_mem(info);
2525                 /* Caller must free the original domain */
2526                 return found;
2527         }
2528
2529         spin_lock(&iommu->lock);
2530         ret = domain_attach_iommu(domain, iommu);
2531         spin_unlock(&iommu->lock);
2532
2533         if (ret) {
2534                 spin_unlock_irqrestore(&device_domain_lock, flags);
2535                 free_devinfo_mem(info);
2536                 return NULL;
2537         }
2538
2539         list_add(&info->link, &domain->devices);
2540         list_add(&info->global, &device_domain_list);
2541         if (dev)
2542                 dev->archdata.iommu = info;
2543         spin_unlock_irqrestore(&device_domain_lock, flags);
2544
2545         /* PASID table is mandatory for a PCI device in scalable mode. */
2546         if (dev && dev_is_pci(dev) && sm_supported(iommu)) {
2547                 ret = intel_pasid_alloc_table(dev);
2548                 if (ret) {
2549                         dev_err(dev, "PASID table allocation failed\n");
2550                         dmar_remove_one_dev_info(dev);
2551                         return NULL;
2552                 }
2553
2554                 /* Setup the PASID entry for requests without PASID: */
2555                 spin_lock(&iommu->lock);
2556                 if (hw_pass_through && domain_type_is_si(domain))
2557                         ret = intel_pasid_setup_pass_through(iommu, domain,
2558                                         dev, PASID_RID2PASID);
2559                 else
2560                         ret = intel_pasid_setup_second_level(iommu, domain,
2561                                         dev, PASID_RID2PASID);
2562                 spin_unlock(&iommu->lock);
2563                 if (ret) {
2564                         dev_err(dev, "Setup RID2PASID failed\n");
2565                         dmar_remove_one_dev_info(dev);
2566                         return NULL;
2567                 }
2568         }
2569
2570         if (dev && domain_context_mapping(domain, dev)) {
2571                 dev_err(dev, "Domain context map failed\n");
2572                 dmar_remove_one_dev_info(dev);
2573                 return NULL;
2574         }
2575
2576         return domain;
2577 }
2578
2579 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2580 {
2581         *(u16 *)opaque = alias;
2582         return 0;
2583 }
2584
2585 static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
2586 {
2587         struct device_domain_info *info;
2588         struct dmar_domain *domain = NULL;
2589         struct intel_iommu *iommu;
2590         u16 dma_alias;
2591         unsigned long flags;
2592         u8 bus, devfn;
2593
2594         iommu = device_to_iommu(dev, &bus, &devfn);
2595         if (!iommu)
2596                 return NULL;
2597
2598         if (dev_is_pci(dev)) {
2599                 struct pci_dev *pdev = to_pci_dev(dev);
2600
2601                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2602
2603                 spin_lock_irqsave(&device_domain_lock, flags);
2604                 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2605                                                       PCI_BUS_NUM(dma_alias),
2606                                                       dma_alias & 0xff);
2607                 if (info) {
2608                         iommu = info->iommu;
2609                         domain = info->domain;
2610                 }
2611                 spin_unlock_irqrestore(&device_domain_lock, flags);
2612
2613                 /* DMA alias already has a domain, use it */
2614                 if (info)
2615                         goto out;
2616         }
2617
2618         /* Allocate and initialize new domain for the device */
2619         domain = alloc_domain(0);
2620         if (!domain)
2621                 return NULL;
2622         if (domain_init(domain, iommu, gaw)) {
2623                 domain_exit(domain);
2624                 return NULL;
2625         }
2626
2627 out:
2628
2629         return domain;
2630 }
2631
2632 static struct dmar_domain *set_domain_for_dev(struct device *dev,
2633                                               struct dmar_domain *domain)
2634 {
2635         struct intel_iommu *iommu;
2636         struct dmar_domain *tmp;
2637         u16 req_id, dma_alias;
2638         u8 bus, devfn;
2639
2640         iommu = device_to_iommu(dev, &bus, &devfn);
2641         if (!iommu)
2642                 return NULL;
2643
2644         req_id = ((u16)bus << 8) | devfn;
2645
2646         if (dev_is_pci(dev)) {
2647                 struct pci_dev *pdev = to_pci_dev(dev);
2648
2649                 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2650
2651                 /* register PCI DMA alias device */
2652                 if (req_id != dma_alias) {
2653                         tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2654                                         dma_alias & 0xff, NULL, domain);
2655
2656                         if (!tmp || tmp != domain)
2657                                 return tmp;
2658                 }
2659         }
2660
2661         tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2662         if (!tmp || tmp != domain)
2663                 return tmp;
2664
2665         return domain;
2666 }
2667
2668 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2669 {
2670         struct dmar_domain *domain, *tmp;
2671
2672         domain = find_domain(dev);
2673         if (domain)
2674                 goto out;
2675
2676         domain = find_or_alloc_domain(dev, gaw);
2677         if (!domain)
2678                 goto out;
2679
2680         tmp = set_domain_for_dev(dev, domain);
2681         if (!tmp || domain != tmp) {
2682                 domain_exit(domain);
2683                 domain = tmp;
2684         }
2685
2686 out:
2687
2688         return domain;
2689 }
2690
2691 static int iommu_domain_identity_map(struct dmar_domain *domain,
2692                                      unsigned long long start,
2693                                      unsigned long long end)
2694 {
2695         unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2696         unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2697
2698         if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2699                           dma_to_mm_pfn(last_vpfn))) {
2700                 pr_err("Reserving iova failed\n");
2701                 return -ENOMEM;
2702         }
2703
2704         pr_debug("Mapping reserved region %llx-%llx\n", start, end);
2705         /*
2706          * RMRR range might have overlap with physical memory range,
2707          * clear it first
2708          */
2709         dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2710
2711         return __domain_mapping(domain, first_vpfn, NULL,
2712                                 first_vpfn, last_vpfn - first_vpfn + 1,
2713                                 DMA_PTE_READ|DMA_PTE_WRITE);
2714 }
2715
2716 static int domain_prepare_identity_map(struct device *dev,
2717                                        struct dmar_domain *domain,
2718                                        unsigned long long start,
2719                                        unsigned long long end)
2720 {
2721         /* For _hardware_ passthrough, don't bother. But for software
2722            passthrough, we do it anyway -- it may indicate a memory
2723            range which is reserved in E820, so which didn't get set
2724            up to start with in si_domain */
2725         if (domain == si_domain && hw_pass_through) {
2726                 dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n",
2727                          start, end);
2728                 return 0;
2729         }
2730
2731         dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end);
2732
2733         if (end < start) {
2734                 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2735                         "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2736                         dmi_get_system_info(DMI_BIOS_VENDOR),
2737                         dmi_get_system_info(DMI_BIOS_VERSION),
2738                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2739                 return -EIO;
2740         }
2741
2742         if (end >> agaw_to_width(domain->agaw)) {
2743                 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2744                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2745                      agaw_to_width(domain->agaw),
2746                      dmi_get_system_info(DMI_BIOS_VENDOR),
2747                      dmi_get_system_info(DMI_BIOS_VERSION),
2748                      dmi_get_system_info(DMI_PRODUCT_VERSION));
2749                 return -EIO;
2750         }
2751
2752         return iommu_domain_identity_map(domain, start, end);
2753 }
2754
2755 static int iommu_prepare_identity_map(struct device *dev,
2756                                       unsigned long long start,
2757                                       unsigned long long end)
2758 {
2759         struct dmar_domain *domain;
2760         int ret;
2761
2762         domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2763         if (!domain)
2764                 return -ENOMEM;
2765
2766         ret = domain_prepare_identity_map(dev, domain, start, end);
2767         if (ret)
2768                 domain_exit(domain);
2769
2770         return ret;
2771 }
2772
2773 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2774                                          struct device *dev)
2775 {
2776         if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2777                 return 0;
2778         return iommu_prepare_identity_map(dev, rmrr->base_address,
2779                                           rmrr->end_address);
2780 }
2781
2782 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2783 static inline void iommu_prepare_isa(void)
2784 {
2785         struct pci_dev *pdev;
2786         int ret;
2787
2788         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2789         if (!pdev)
2790                 return;
2791
2792         pr_info("Prepare 0-16MiB unity mapping for LPC\n");
2793         ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2794
2795         if (ret)
2796                 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
2797
2798         pci_dev_put(pdev);
2799 }
2800 #else
2801 static inline void iommu_prepare_isa(void)
2802 {
2803         return;
2804 }
2805 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2806
2807 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2808
2809 static int __init si_domain_init(int hw)
2810 {
2811         int nid, ret;
2812
2813         si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2814         if (!si_domain)
2815                 return -EFAULT;
2816
2817         if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2818                 domain_exit(si_domain);
2819                 return -EFAULT;
2820         }
2821
2822         pr_debug("Identity mapping domain allocated\n");
2823
2824         if (hw)
2825                 return 0;
2826
2827         for_each_online_node(nid) {
2828                 unsigned long start_pfn, end_pfn;
2829                 int i;
2830
2831                 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2832                         ret = iommu_domain_identity_map(si_domain,
2833                                         PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2834                         if (ret)
2835                                 return ret;
2836                 }
2837         }
2838
2839         return 0;
2840 }
2841
2842 static int identity_mapping(struct device *dev)
2843 {
2844         struct device_domain_info *info;
2845
2846         if (likely(!iommu_identity_mapping))
2847                 return 0;
2848
2849         info = dev->archdata.iommu;
2850         if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2851                 return (info->domain == si_domain);
2852
2853         return 0;
2854 }
2855
2856 static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2857 {
2858         struct dmar_domain *ndomain;
2859         struct intel_iommu *iommu;
2860         u8 bus, devfn;
2861
2862         iommu = device_to_iommu(dev, &bus, &devfn);
2863         if (!iommu)
2864                 return -ENODEV;
2865
2866         ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
2867         if (ndomain != domain)
2868                 return -EBUSY;
2869
2870         return 0;
2871 }
2872
2873 static bool device_has_rmrr(struct device *dev)
2874 {
2875         struct dmar_rmrr_unit *rmrr;
2876         struct device *tmp;
2877         int i;
2878
2879         rcu_read_lock();
2880         for_each_rmrr_units(rmrr) {
2881                 /*
2882                  * Return TRUE if this RMRR contains the device that
2883                  * is passed in.
2884                  */
2885                 for_each_active_dev_scope(rmrr->devices,
2886                                           rmrr->devices_cnt, i, tmp)
2887                         if (tmp == dev) {
2888                                 rcu_read_unlock();
2889                                 return true;
2890                         }
2891         }
2892         rcu_read_unlock();
2893         return false;
2894 }
2895
2896 /*
2897  * There are a couple cases where we need to restrict the functionality of
2898  * devices associated with RMRRs.  The first is when evaluating a device for
2899  * identity mapping because problems exist when devices are moved in and out
2900  * of domains and their respective RMRR information is lost.  This means that
2901  * a device with associated RMRRs will never be in a "passthrough" domain.
2902  * The second is use of the device through the IOMMU API.  This interface
2903  * expects to have full control of the IOVA space for the device.  We cannot
2904  * satisfy both the requirement that RMRR access is maintained and have an
2905  * unencumbered IOVA space.  We also have no ability to quiesce the device's
2906  * use of the RMRR space or even inform the IOMMU API user of the restriction.
2907  * We therefore prevent devices associated with an RMRR from participating in
2908  * the IOMMU API, which eliminates them from device assignment.
2909  *
2910  * In both cases we assume that PCI USB devices with RMRRs have them largely
2911  * for historical reasons and that the RMRR space is not actively used post
2912  * boot.  This exclusion may change if vendors begin to abuse it.
2913  *
2914  * The same exception is made for graphics devices, with the requirement that
2915  * any use of the RMRR regions will be torn down before assigning the device
2916  * to a guest.
2917  */
2918 static bool device_is_rmrr_locked(struct device *dev)
2919 {
2920         if (!device_has_rmrr(dev))
2921                 return false;
2922
2923         if (dev_is_pci(dev)) {
2924                 struct pci_dev *pdev = to_pci_dev(dev);
2925
2926                 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
2927                         return false;
2928         }
2929
2930         return true;
2931 }
2932
2933 static int iommu_should_identity_map(struct device *dev, int startup)
2934 {
2935         if (dev_is_pci(dev)) {
2936                 struct pci_dev *pdev = to_pci_dev(dev);
2937
2938                 if (device_is_rmrr_locked(dev))
2939                         return 0;
2940
2941                 /*
2942                  * Prevent any device marked as untrusted from getting
2943                  * placed into the statically identity mapping domain.
2944                  */
2945                 if (pdev->untrusted)
2946                         return 0;
2947
2948                 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2949                         return 1;
2950
2951                 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2952                         return 1;
2953
2954                 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2955                         return 0;
2956
2957                 /*
2958                  * We want to start off with all devices in the 1:1 domain, and
2959                  * take them out later if we find they can't access all of memory.
2960                  *
2961                  * However, we can't do this for PCI devices behind bridges,
2962                  * because all PCI devices behind the same bridge will end up
2963                  * with the same source-id on their transactions.
2964                  *
2965                  * Practically speaking, we can't change things around for these
2966                  * devices at run-time, because we can't be sure there'll be no
2967                  * DMA transactions in flight for any of their siblings.
2968                  *
2969                  * So PCI devices (unless they're on the root bus) as well as
2970                  * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2971                  * the 1:1 domain, just in _case_ one of their siblings turns out
2972                  * not to be able to map all of memory.
2973                  */
2974                 if (!pci_is_pcie(pdev)) {
2975                         if (!pci_is_root_bus(pdev->bus))
2976                                 return 0;
2977                         if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2978                                 return 0;
2979                 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2980                         return 0;
2981         } else {
2982                 if (device_has_rmrr(dev))
2983                         return 0;
2984         }
2985
2986         /*
2987          * At boot time, we don't yet know if devices will be 64-bit capable.
2988          * Assume that they will — if they turn out not to be, then we can
2989          * take them out of the 1:1 domain later.
2990          */
2991         if (!startup) {
2992                 /*
2993                  * If the device's dma_mask is less than the system's memory
2994                  * size then this is not a candidate for identity mapping.
2995                  */
2996                 u64 dma_mask = *dev->dma_mask;
2997
2998                 if (dev->coherent_dma_mask &&
2999                     dev->coherent_dma_mask < dma_mask)
3000                         dma_mask = dev->coherent_dma_mask;
3001
3002                 return dma_mask >= dma_get_required_mask(dev);
3003         }
3004
3005         return 1;
3006 }
3007
3008 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
3009 {
3010         int ret;
3011
3012         if (!iommu_should_identity_map(dev, 1))
3013                 return 0;
3014
3015         ret = domain_add_dev_info(si_domain, dev);
3016         if (!ret)
3017                 dev_info(dev, "%s identity mapping\n",
3018                          hw ? "Hardware" : "Software");
3019         else if (ret == -ENODEV)
3020                 /* device not associated with an iommu */
3021                 ret = 0;
3022
3023         return ret;
3024 }
3025
3026
3027 static int __init iommu_prepare_static_identity_mapping(int hw)
3028 {
3029         struct pci_dev *pdev = NULL;
3030         struct dmar_drhd_unit *drhd;
3031         struct intel_iommu *iommu;
3032         struct device *dev;
3033         int i;
3034         int ret = 0;
3035
3036         for_each_pci_dev(pdev) {
3037                 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
3038                 if (ret)
3039                         return ret;
3040         }
3041
3042         for_each_active_iommu(iommu, drhd)
3043                 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
3044                         struct acpi_device_physical_node *pn;
3045                         struct acpi_device *adev;
3046
3047                         if (dev->bus != &acpi_bus_type)
3048                                 continue;
3049
3050                         adev= to_acpi_device(dev);
3051                         mutex_lock(&adev->physical_node_lock);
3052                         list_for_each_entry(pn, &adev->physical_node_list, node) {
3053                                 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
3054                                 if (ret)
3055                                         break;
3056                         }
3057                         mutex_unlock(&adev->physical_node_lock);
3058                         if (ret)
3059                                 return ret;
3060                 }
3061
3062         return 0;
3063 }
3064
3065 static void intel_iommu_init_qi(struct intel_iommu *iommu)
3066 {
3067         /*
3068          * Start from the sane iommu hardware state.
3069          * If the queued invalidation is already initialized by us
3070          * (for example, while enabling interrupt-remapping) then
3071          * we got the things already rolling from a sane state.
3072          */
3073         if (!iommu->qi) {
3074                 /*
3075                  * Clear any previous faults.
3076                  */
3077                 dmar_fault(-1, iommu);
3078                 /*
3079                  * Disable queued invalidation if supported and already enabled
3080                  * before OS handover.
3081                  */
3082                 dmar_disable_qi(iommu);
3083         }
3084
3085         if (dmar_enable_qi(iommu)) {
3086                 /*
3087                  * Queued Invalidate not enabled, use Register Based Invalidate
3088                  */
3089                 iommu->flush.flush_context = __iommu_flush_context;
3090                 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
3091                 pr_info("%s: Using Register based invalidation\n",
3092                         iommu->name);
3093         } else {
3094                 iommu->flush.flush_context = qi_flush_context;
3095                 iommu->flush.flush_iotlb = qi_flush_iotlb;
3096                 pr_info("%s: Using Queued invalidation\n", iommu->name);
3097         }
3098 }
3099
3100 static int copy_context_table(struct intel_iommu *iommu,
3101                               struct root_entry *old_re,
3102                               struct context_entry **tbl,
3103                               int bus, bool ext)
3104 {
3105         int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
3106         struct context_entry *new_ce = NULL, ce;
3107         struct context_entry *old_ce = NULL;
3108         struct root_entry re;
3109         phys_addr_t old_ce_phys;
3110
3111         tbl_idx = ext ? bus * 2 : bus;
3112         memcpy(&re, old_re, sizeof(re));
3113
3114         for (devfn = 0; devfn < 256; devfn++) {
3115                 /* First calculate the correct index */
3116                 idx = (ext ? devfn * 2 : devfn) % 256;
3117
3118                 if (idx == 0) {
3119                         /* First save what we may have and clean up */
3120                         if (new_ce) {
3121                                 tbl[tbl_idx] = new_ce;
3122                                 __iommu_flush_cache(iommu, new_ce,
3123                                                     VTD_PAGE_SIZE);
3124                                 pos = 1;
3125                         }
3126
3127                         if (old_ce)
3128                                 memunmap(old_ce);
3129
3130                         ret = 0;
3131                         if (devfn < 0x80)
3132                                 old_ce_phys = root_entry_lctp(&re);
3133                         else
3134                                 old_ce_phys = root_entry_uctp(&re);
3135
3136                         if (!old_ce_phys) {
3137                                 if (ext && devfn == 0) {
3138                                         /* No LCTP, try UCTP */
3139                                         devfn = 0x7f;
3140                                         continue;
3141                                 } else {
3142                                         goto out;
3143                                 }
3144                         }
3145
3146                         ret = -ENOMEM;
3147                         old_ce = memremap(old_ce_phys, PAGE_SIZE,
3148                                         MEMREMAP_WB);
3149                         if (!old_ce)
3150                                 goto out;
3151
3152                         new_ce = alloc_pgtable_page(iommu->node);
3153                         if (!new_ce)
3154                                 goto out_unmap;
3155
3156                         ret = 0;
3157                 }
3158
3159                 /* Now copy the context entry */
3160                 memcpy(&ce, old_ce + idx, sizeof(ce));
3161
3162                 if (!__context_present(&ce))
3163                         continue;
3164
3165                 did = context_domain_id(&ce);
3166                 if (did >= 0 && did < cap_ndoms(iommu->cap))
3167                         set_bit(did, iommu->domain_ids);
3168
3169                 /*
3170                  * We need a marker for copied context entries. This
3171                  * marker needs to work for the old format as well as
3172                  * for extended context entries.
3173                  *
3174                  * Bit 67 of the context entry is used. In the old
3175                  * format this bit is available to software, in the
3176                  * extended format it is the PGE bit, but PGE is ignored
3177                  * by HW if PASIDs are disabled (and thus still
3178                  * available).
3179                  *
3180                  * So disable PASIDs first and then mark the entry
3181                  * copied. This means that we don't copy PASID
3182                  * translations from the old kernel, but this is fine as
3183                  * faults there are not fatal.
3184                  */
3185                 context_clear_pasid_enable(&ce);
3186                 context_set_copied(&ce);
3187
3188                 new_ce[idx] = ce;
3189         }
3190
3191         tbl[tbl_idx + pos] = new_ce;
3192
3193         __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
3194
3195 out_unmap:
3196         memunmap(old_ce);
3197
3198 out:
3199         return ret;
3200 }
3201
3202 static int copy_translation_tables(struct intel_iommu *iommu)
3203 {
3204         struct context_entry **ctxt_tbls;
3205         struct root_entry *old_rt;
3206         phys_addr_t old_rt_phys;
3207         int ctxt_table_entries;
3208         unsigned long flags;
3209         u64 rtaddr_reg;
3210         int bus, ret;
3211         bool new_ext, ext;
3212
3213         rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
3214         ext        = !!(rtaddr_reg & DMA_RTADDR_RTT);
3215         new_ext    = !!ecap_ecs(iommu->ecap);
3216
3217         /*
3218          * The RTT bit can only be changed when translation is disabled,
3219          * but disabling translation means to open a window for data
3220          * corruption. So bail out and don't copy anything if we would
3221          * have to change the bit.
3222          */
3223         if (new_ext != ext)
3224                 return -EINVAL;
3225
3226         old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
3227         if (!old_rt_phys)
3228                 return -EINVAL;
3229
3230         old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
3231         if (!old_rt)
3232                 return -ENOMEM;
3233
3234         /* This is too big for the stack - allocate it from slab */
3235         ctxt_table_entries = ext ? 512 : 256;
3236         ret = -ENOMEM;
3237         ctxt_tbls = kcalloc(ctxt_table_entries, sizeof(void *), GFP_KERNEL);
3238         if (!ctxt_tbls)
3239                 goto out_unmap;
3240
3241         for (bus = 0; bus < 256; bus++) {
3242                 ret = copy_context_table(iommu, &old_rt[bus],
3243                                          ctxt_tbls, bus, ext);
3244                 if (ret) {
3245                         pr_err("%s: Failed to copy context table for bus %d\n",
3246                                 iommu->name, bus);
3247                         continue;
3248                 }
3249         }
3250
3251         spin_lock_irqsave(&iommu->lock, flags);
3252
3253         /* Context tables are copied, now write them to the root_entry table */
3254         for (bus = 0; bus < 256; bus++) {
3255                 int idx = ext ? bus * 2 : bus;
3256                 u64 val;
3257
3258                 if (ctxt_tbls[idx]) {
3259                         val = virt_to_phys(ctxt_tbls[idx]) | 1;
3260                         iommu->root_entry[bus].lo = val;
3261                 }
3262
3263                 if (!ext || !ctxt_tbls[idx + 1])
3264                         continue;
3265
3266                 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
3267                 iommu->root_entry[bus].hi = val;
3268         }
3269
3270         spin_unlock_irqrestore(&iommu->lock, flags);
3271
3272         kfree(ctxt_tbls);
3273
3274         __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
3275
3276         ret = 0;
3277
3278 out_unmap:
3279         memunmap(old_rt);
3280
3281         return ret;
3282 }
3283
3284 static int __init init_dmars(void)
3285 {
3286         struct dmar_drhd_unit *drhd;
3287         struct dmar_rmrr_unit *rmrr;
3288         bool copied_tables = false;
3289         struct device *dev;
3290         struct intel_iommu *iommu;
3291         int i, ret;
3292
3293         /*
3294          * for each drhd
3295          *    allocate root
3296          *    initialize and program root entry to not present
3297          * endfor
3298          */
3299         for_each_drhd_unit(drhd) {
3300                 /*
3301                  * lock not needed as this is only incremented in the single
3302                  * threaded kernel __init code path all other access are read
3303                  * only
3304                  */
3305                 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
3306                         g_num_of_iommus++;
3307                         continue;
3308                 }
3309                 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
3310         }
3311
3312         /* Preallocate enough resources for IOMMU hot-addition */
3313         if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3314                 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3315
3316         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3317                         GFP_KERNEL);
3318         if (!g_iommus) {
3319                 pr_err("Allocating global iommu array failed\n");
3320                 ret = -ENOMEM;
3321                 goto error;
3322         }
3323
3324         for_each_active_iommu(iommu, drhd) {
3325                 /*
3326                  * Find the max pasid size of all IOMMU's in the system.
3327                  * We need to ensure the system pasid table is no bigger
3328                  * than the smallest supported.
3329                  */
3330                 if (pasid_supported(iommu)) {
3331                         u32 temp = 2 << ecap_pss(iommu->ecap);
3332
3333                         intel_pasid_max_id = min_t(u32, temp,
3334                                                    intel_pasid_max_id);
3335                 }
3336
3337                 g_iommus[iommu->seq_id] = iommu;
3338
3339                 intel_iommu_init_qi(iommu);
3340
3341                 ret = iommu_init_domains(iommu);
3342                 if (ret)
3343                         goto free_iommu;
3344
3345                 init_translation_status(iommu);
3346
3347                 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3348                         iommu_disable_translation(iommu);
3349                         clear_translation_pre_enabled(iommu);
3350                         pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3351                                 iommu->name);
3352                 }
3353
3354                 /*
3355                  * TBD:
3356                  * we could share the same root & context tables
3357                  * among all IOMMU's. Need to Split it later.
3358                  */
3359                 ret = iommu_alloc_root_entry(iommu);
3360                 if (ret)
3361                         goto free_iommu;
3362
3363                 if (translation_pre_enabled(iommu)) {
3364                         pr_info("Translation already enabled - trying to copy translation structures\n");
3365
3366                         ret = copy_translation_tables(iommu);