]> pilppa.org Git - linux-2.6-omap-h63xx.git/blob - drivers/pci/intel-iommu.c
Get iommu from g_iommus for deferred flush
[linux-2.6-omap-h63xx.git] / drivers / pci / intel-iommu.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  * Author: Fenghua Yu <fenghua.yu@intel.com>
22  */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/intel-iommu.h>
38 #include <asm/cacheflush.h>
39 #include <asm/iommu.h>
40 #include "pci.h"
41
42 #define ROOT_SIZE               VTD_PAGE_SIZE
43 #define CONTEXT_SIZE            VTD_PAGE_SIZE
44
45 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48 #define IOAPIC_RANGE_START      (0xfee00000)
49 #define IOAPIC_RANGE_END        (0xfeefffff)
50 #define IOVA_START_ADDR         (0x1000)
51
52 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
54 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
56 #define IOVA_PFN(addr)          ((addr) >> PAGE_SHIFT)
57 #define DMA_32BIT_PFN           IOVA_PFN(DMA_32BIT_MASK)
58 #define DMA_64BIT_PFN           IOVA_PFN(DMA_64BIT_MASK)
59
60 /* global iommu list, set NULL for ignored DMAR units */
61 static struct intel_iommu **g_iommus;
62
63 /*
64  * 0: Present
65  * 1-11: Reserved
66  * 12-63: Context Ptr (12 - (haw-1))
67  * 64-127: Reserved
68  */
69 struct root_entry {
70         u64     val;
71         u64     rsvd1;
72 };
73 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74 static inline bool root_present(struct root_entry *root)
75 {
76         return (root->val & 1);
77 }
78 static inline void set_root_present(struct root_entry *root)
79 {
80         root->val |= 1;
81 }
82 static inline void set_root_value(struct root_entry *root, unsigned long value)
83 {
84         root->val |= value & VTD_PAGE_MASK;
85 }
86
87 static inline struct context_entry *
88 get_context_addr_from_root(struct root_entry *root)
89 {
90         return (struct context_entry *)
91                 (root_present(root)?phys_to_virt(
92                 root->val & VTD_PAGE_MASK) :
93                 NULL);
94 }
95
96 /*
97  * low 64 bits:
98  * 0: present
99  * 1: fault processing disable
100  * 2-3: translation type
101  * 12-63: address space root
102  * high 64 bits:
103  * 0-2: address width
104  * 3-6: aval
105  * 8-23: domain id
106  */
107 struct context_entry {
108         u64 lo;
109         u64 hi;
110 };
111
112 static inline bool context_present(struct context_entry *context)
113 {
114         return (context->lo & 1);
115 }
116 static inline void context_set_present(struct context_entry *context)
117 {
118         context->lo |= 1;
119 }
120
121 static inline void context_set_fault_enable(struct context_entry *context)
122 {
123         context->lo &= (((u64)-1) << 2) | 1;
124 }
125
126 #define CONTEXT_TT_MULTI_LEVEL 0
127
128 static inline void context_set_translation_type(struct context_entry *context,
129                                                 unsigned long value)
130 {
131         context->lo &= (((u64)-1) << 4) | 3;
132         context->lo |= (value & 3) << 2;
133 }
134
135 static inline void context_set_address_root(struct context_entry *context,
136                                             unsigned long value)
137 {
138         context->lo |= value & VTD_PAGE_MASK;
139 }
140
141 static inline void context_set_address_width(struct context_entry *context,
142                                              unsigned long value)
143 {
144         context->hi |= value & 7;
145 }
146
147 static inline void context_set_domain_id(struct context_entry *context,
148                                          unsigned long value)
149 {
150         context->hi |= (value & ((1 << 16) - 1)) << 8;
151 }
152
153 static inline void context_clear_entry(struct context_entry *context)
154 {
155         context->lo = 0;
156         context->hi = 0;
157 }
158
159 /*
160  * 0: readable
161  * 1: writable
162  * 2-6: reserved
163  * 7: super page
164  * 8-11: available
165  * 12-63: Host physcial address
166  */
167 struct dma_pte {
168         u64 val;
169 };
170
171 static inline void dma_clear_pte(struct dma_pte *pte)
172 {
173         pte->val = 0;
174 }
175
176 static inline void dma_set_pte_readable(struct dma_pte *pte)
177 {
178         pte->val |= DMA_PTE_READ;
179 }
180
181 static inline void dma_set_pte_writable(struct dma_pte *pte)
182 {
183         pte->val |= DMA_PTE_WRITE;
184 }
185
186 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
187 {
188         pte->val = (pte->val & ~3) | (prot & 3);
189 }
190
191 static inline u64 dma_pte_addr(struct dma_pte *pte)
192 {
193         return (pte->val & VTD_PAGE_MASK);
194 }
195
196 static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
197 {
198         pte->val |= (addr & VTD_PAGE_MASK);
199 }
200
201 static inline bool dma_pte_present(struct dma_pte *pte)
202 {
203         return (pte->val & 3) != 0;
204 }
205
206 /* devices under the same p2p bridge are owned in one domain */
207 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
208
209 struct dmar_domain {
210         int     id;                     /* domain id */
211         struct intel_iommu *iommu;      /* back pointer to owning iommu */
212
213         struct list_head devices;       /* all devices' list */
214         struct iova_domain iovad;       /* iova's that belong to this domain */
215
216         struct dma_pte  *pgd;           /* virtual address */
217         spinlock_t      mapping_lock;   /* page table lock */
218         int             gaw;            /* max guest address width */
219
220         /* adjusted guest address width, 0 is level 2 30-bit */
221         int             agaw;
222
223         int             flags;          /* flags to find out type of domain */
224 };
225
226 /* PCI domain-device relationship */
227 struct device_domain_info {
228         struct list_head link;  /* link to domain siblings */
229         struct list_head global; /* link to global list */
230         u8 bus;                 /* PCI bus numer */
231         u8 devfn;               /* PCI devfn number */
232         struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
233         struct dmar_domain *domain; /* pointer to domain */
234 };
235
236 static void flush_unmaps_timeout(unsigned long data);
237
238 DEFINE_TIMER(unmap_timer,  flush_unmaps_timeout, 0, 0);
239
240 #define HIGH_WATER_MARK 250
241 struct deferred_flush_tables {
242         int next;
243         struct iova *iova[HIGH_WATER_MARK];
244         struct dmar_domain *domain[HIGH_WATER_MARK];
245 };
246
247 static struct deferred_flush_tables *deferred_flush;
248
249 /* bitmap for indexing intel_iommus */
250 static int g_num_of_iommus;
251
252 static DEFINE_SPINLOCK(async_umap_flush_lock);
253 static LIST_HEAD(unmaps_to_do);
254
255 static int timer_on;
256 static long list_size;
257
258 static void domain_remove_dev_info(struct dmar_domain *domain);
259
260 int dmar_disabled;
261 static int __initdata dmar_map_gfx = 1;
262 static int dmar_forcedac;
263 static int intel_iommu_strict;
264
265 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
266 static DEFINE_SPINLOCK(device_domain_lock);
267 static LIST_HEAD(device_domain_list);
268
269 static int __init intel_iommu_setup(char *str)
270 {
271         if (!str)
272                 return -EINVAL;
273         while (*str) {
274                 if (!strncmp(str, "off", 3)) {
275                         dmar_disabled = 1;
276                         printk(KERN_INFO"Intel-IOMMU: disabled\n");
277                 } else if (!strncmp(str, "igfx_off", 8)) {
278                         dmar_map_gfx = 0;
279                         printk(KERN_INFO
280                                 "Intel-IOMMU: disable GFX device mapping\n");
281                 } else if (!strncmp(str, "forcedac", 8)) {
282                         printk(KERN_INFO
283                                 "Intel-IOMMU: Forcing DAC for PCI devices\n");
284                         dmar_forcedac = 1;
285                 } else if (!strncmp(str, "strict", 6)) {
286                         printk(KERN_INFO
287                                 "Intel-IOMMU: disable batched IOTLB flush\n");
288                         intel_iommu_strict = 1;
289                 }
290
291                 str += strcspn(str, ",");
292                 while (*str == ',')
293                         str++;
294         }
295         return 0;
296 }
297 __setup("intel_iommu=", intel_iommu_setup);
298
299 static struct kmem_cache *iommu_domain_cache;
300 static struct kmem_cache *iommu_devinfo_cache;
301 static struct kmem_cache *iommu_iova_cache;
302
303 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
304 {
305         unsigned int flags;
306         void *vaddr;
307
308         /* trying to avoid low memory issues */
309         flags = current->flags & PF_MEMALLOC;
310         current->flags |= PF_MEMALLOC;
311         vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
312         current->flags &= (~PF_MEMALLOC | flags);
313         return vaddr;
314 }
315
316
317 static inline void *alloc_pgtable_page(void)
318 {
319         unsigned int flags;
320         void *vaddr;
321
322         /* trying to avoid low memory issues */
323         flags = current->flags & PF_MEMALLOC;
324         current->flags |= PF_MEMALLOC;
325         vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
326         current->flags &= (~PF_MEMALLOC | flags);
327         return vaddr;
328 }
329
330 static inline void free_pgtable_page(void *vaddr)
331 {
332         free_page((unsigned long)vaddr);
333 }
334
335 static inline void *alloc_domain_mem(void)
336 {
337         return iommu_kmem_cache_alloc(iommu_domain_cache);
338 }
339
340 static void free_domain_mem(void *vaddr)
341 {
342         kmem_cache_free(iommu_domain_cache, vaddr);
343 }
344
345 static inline void * alloc_devinfo_mem(void)
346 {
347         return iommu_kmem_cache_alloc(iommu_devinfo_cache);
348 }
349
350 static inline void free_devinfo_mem(void *vaddr)
351 {
352         kmem_cache_free(iommu_devinfo_cache, vaddr);
353 }
354
355 struct iova *alloc_iova_mem(void)
356 {
357         return iommu_kmem_cache_alloc(iommu_iova_cache);
358 }
359
360 void free_iova_mem(struct iova *iova)
361 {
362         kmem_cache_free(iommu_iova_cache, iova);
363 }
364
365 /* Gets context entry for a given bus and devfn */
366 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
367                 u8 bus, u8 devfn)
368 {
369         struct root_entry *root;
370         struct context_entry *context;
371         unsigned long phy_addr;
372         unsigned long flags;
373
374         spin_lock_irqsave(&iommu->lock, flags);
375         root = &iommu->root_entry[bus];
376         context = get_context_addr_from_root(root);
377         if (!context) {
378                 context = (struct context_entry *)alloc_pgtable_page();
379                 if (!context) {
380                         spin_unlock_irqrestore(&iommu->lock, flags);
381                         return NULL;
382                 }
383                 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
384                 phy_addr = virt_to_phys((void *)context);
385                 set_root_value(root, phy_addr);
386                 set_root_present(root);
387                 __iommu_flush_cache(iommu, root, sizeof(*root));
388         }
389         spin_unlock_irqrestore(&iommu->lock, flags);
390         return &context[devfn];
391 }
392
393 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
394 {
395         struct root_entry *root;
396         struct context_entry *context;
397         int ret;
398         unsigned long flags;
399
400         spin_lock_irqsave(&iommu->lock, flags);
401         root = &iommu->root_entry[bus];
402         context = get_context_addr_from_root(root);
403         if (!context) {
404                 ret = 0;
405                 goto out;
406         }
407         ret = context_present(&context[devfn]);
408 out:
409         spin_unlock_irqrestore(&iommu->lock, flags);
410         return ret;
411 }
412
413 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
414 {
415         struct root_entry *root;
416         struct context_entry *context;
417         unsigned long flags;
418
419         spin_lock_irqsave(&iommu->lock, flags);
420         root = &iommu->root_entry[bus];
421         context = get_context_addr_from_root(root);
422         if (context) {
423                 context_clear_entry(&context[devfn]);
424                 __iommu_flush_cache(iommu, &context[devfn], \
425                         sizeof(*context));
426         }
427         spin_unlock_irqrestore(&iommu->lock, flags);
428 }
429
430 static void free_context_table(struct intel_iommu *iommu)
431 {
432         struct root_entry *root;
433         int i;
434         unsigned long flags;
435         struct context_entry *context;
436
437         spin_lock_irqsave(&iommu->lock, flags);
438         if (!iommu->root_entry) {
439                 goto out;
440         }
441         for (i = 0; i < ROOT_ENTRY_NR; i++) {
442                 root = &iommu->root_entry[i];
443                 context = get_context_addr_from_root(root);
444                 if (context)
445                         free_pgtable_page(context);
446         }
447         free_pgtable_page(iommu->root_entry);
448         iommu->root_entry = NULL;
449 out:
450         spin_unlock_irqrestore(&iommu->lock, flags);
451 }
452
453 /* page table handling */
454 #define LEVEL_STRIDE            (9)
455 #define LEVEL_MASK              (((u64)1 << LEVEL_STRIDE) - 1)
456
457 static inline int agaw_to_level(int agaw)
458 {
459         return agaw + 2;
460 }
461
462 static inline int agaw_to_width(int agaw)
463 {
464         return 30 + agaw * LEVEL_STRIDE;
465
466 }
467
468 static inline int width_to_agaw(int width)
469 {
470         return (width - 30) / LEVEL_STRIDE;
471 }
472
473 static inline unsigned int level_to_offset_bits(int level)
474 {
475         return (12 + (level - 1) * LEVEL_STRIDE);
476 }
477
478 static inline int address_level_offset(u64 addr, int level)
479 {
480         return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
481 }
482
483 static inline u64 level_mask(int level)
484 {
485         return ((u64)-1 << level_to_offset_bits(level));
486 }
487
488 static inline u64 level_size(int level)
489 {
490         return ((u64)1 << level_to_offset_bits(level));
491 }
492
493 static inline u64 align_to_level(u64 addr, int level)
494 {
495         return ((addr + level_size(level) - 1) & level_mask(level));
496 }
497
498 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
499 {
500         int addr_width = agaw_to_width(domain->agaw);
501         struct dma_pte *parent, *pte = NULL;
502         int level = agaw_to_level(domain->agaw);
503         int offset;
504         unsigned long flags;
505
506         BUG_ON(!domain->pgd);
507
508         addr &= (((u64)1) << addr_width) - 1;
509         parent = domain->pgd;
510
511         spin_lock_irqsave(&domain->mapping_lock, flags);
512         while (level > 0) {
513                 void *tmp_page;
514
515                 offset = address_level_offset(addr, level);
516                 pte = &parent[offset];
517                 if (level == 1)
518                         break;
519
520                 if (!dma_pte_present(pte)) {
521                         tmp_page = alloc_pgtable_page();
522
523                         if (!tmp_page) {
524                                 spin_unlock_irqrestore(&domain->mapping_lock,
525                                         flags);
526                                 return NULL;
527                         }
528                         __iommu_flush_cache(domain->iommu, tmp_page,
529                                         PAGE_SIZE);
530                         dma_set_pte_addr(pte, virt_to_phys(tmp_page));
531                         /*
532                          * high level table always sets r/w, last level page
533                          * table control read/write
534                          */
535                         dma_set_pte_readable(pte);
536                         dma_set_pte_writable(pte);
537                         __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
538                 }
539                 parent = phys_to_virt(dma_pte_addr(pte));
540                 level--;
541         }
542
543         spin_unlock_irqrestore(&domain->mapping_lock, flags);
544         return pte;
545 }
546
547 /* return address's pte at specific level */
548 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
549                 int level)
550 {
551         struct dma_pte *parent, *pte = NULL;
552         int total = agaw_to_level(domain->agaw);
553         int offset;
554
555         parent = domain->pgd;
556         while (level <= total) {
557                 offset = address_level_offset(addr, total);
558                 pte = &parent[offset];
559                 if (level == total)
560                         return pte;
561
562                 if (!dma_pte_present(pte))
563                         break;
564                 parent = phys_to_virt(dma_pte_addr(pte));
565                 total--;
566         }
567         return NULL;
568 }
569
570 /* clear one page's page table */
571 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
572 {
573         struct dma_pte *pte = NULL;
574
575         /* get last level pte */
576         pte = dma_addr_level_pte(domain, addr, 1);
577
578         if (pte) {
579                 dma_clear_pte(pte);
580                 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
581         }
582 }
583
584 /* clear last level pte, a tlb flush should be followed */
585 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
586 {
587         int addr_width = agaw_to_width(domain->agaw);
588
589         start &= (((u64)1) << addr_width) - 1;
590         end &= (((u64)1) << addr_width) - 1;
591         /* in case it's partial page */
592         start = PAGE_ALIGN(start);
593         end &= PAGE_MASK;
594
595         /* we don't need lock here, nobody else touches the iova range */
596         while (start < end) {
597                 dma_pte_clear_one(domain, start);
598                 start += VTD_PAGE_SIZE;
599         }
600 }
601
602 /* free page table pages. last level pte should already be cleared */
603 static void dma_pte_free_pagetable(struct dmar_domain *domain,
604         u64 start, u64 end)
605 {
606         int addr_width = agaw_to_width(domain->agaw);
607         struct dma_pte *pte;
608         int total = agaw_to_level(domain->agaw);
609         int level;
610         u64 tmp;
611
612         start &= (((u64)1) << addr_width) - 1;
613         end &= (((u64)1) << addr_width) - 1;
614
615         /* we don't need lock here, nobody else touches the iova range */
616         level = 2;
617         while (level <= total) {
618                 tmp = align_to_level(start, level);
619                 if (tmp >= end || (tmp + level_size(level) > end))
620                         return;
621
622                 while (tmp < end) {
623                         pte = dma_addr_level_pte(domain, tmp, level);
624                         if (pte) {
625                                 free_pgtable_page(
626                                         phys_to_virt(dma_pte_addr(pte)));
627                                 dma_clear_pte(pte);
628                                 __iommu_flush_cache(domain->iommu,
629                                                 pte, sizeof(*pte));
630                         }
631                         tmp += level_size(level);
632                 }
633                 level++;
634         }
635         /* free pgd */
636         if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
637                 free_pgtable_page(domain->pgd);
638                 domain->pgd = NULL;
639         }
640 }
641
642 /* iommu handling */
643 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
644 {
645         struct root_entry *root;
646         unsigned long flags;
647
648         root = (struct root_entry *)alloc_pgtable_page();
649         if (!root)
650                 return -ENOMEM;
651
652         __iommu_flush_cache(iommu, root, ROOT_SIZE);
653
654         spin_lock_irqsave(&iommu->lock, flags);
655         iommu->root_entry = root;
656         spin_unlock_irqrestore(&iommu->lock, flags);
657
658         return 0;
659 }
660
661 static void iommu_set_root_entry(struct intel_iommu *iommu)
662 {
663         void *addr;
664         u32 cmd, sts;
665         unsigned long flag;
666
667         addr = iommu->root_entry;
668
669         spin_lock_irqsave(&iommu->register_lock, flag);
670         dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
671
672         cmd = iommu->gcmd | DMA_GCMD_SRTP;
673         writel(cmd, iommu->reg + DMAR_GCMD_REG);
674
675         /* Make sure hardware complete it */
676         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
677                 readl, (sts & DMA_GSTS_RTPS), sts);
678
679         spin_unlock_irqrestore(&iommu->register_lock, flag);
680 }
681
682 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
683 {
684         u32 val;
685         unsigned long flag;
686
687         if (!cap_rwbf(iommu->cap))
688                 return;
689         val = iommu->gcmd | DMA_GCMD_WBF;
690
691         spin_lock_irqsave(&iommu->register_lock, flag);
692         writel(val, iommu->reg + DMAR_GCMD_REG);
693
694         /* Make sure hardware complete it */
695         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
696                         readl, (!(val & DMA_GSTS_WBFS)), val);
697
698         spin_unlock_irqrestore(&iommu->register_lock, flag);
699 }
700
701 /* return value determine if we need a write buffer flush */
702 static int __iommu_flush_context(struct intel_iommu *iommu,
703         u16 did, u16 source_id, u8 function_mask, u64 type,
704         int non_present_entry_flush)
705 {
706         u64 val = 0;
707         unsigned long flag;
708
709         /*
710          * In the non-present entry flush case, if hardware doesn't cache
711          * non-present entry we do nothing and if hardware cache non-present
712          * entry, we flush entries of domain 0 (the domain id is used to cache
713          * any non-present entries)
714          */
715         if (non_present_entry_flush) {
716                 if (!cap_caching_mode(iommu->cap))
717                         return 1;
718                 else
719                         did = 0;
720         }
721
722         switch (type) {
723         case DMA_CCMD_GLOBAL_INVL:
724                 val = DMA_CCMD_GLOBAL_INVL;
725                 break;
726         case DMA_CCMD_DOMAIN_INVL:
727                 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
728                 break;
729         case DMA_CCMD_DEVICE_INVL:
730                 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
731                         | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
732                 break;
733         default:
734                 BUG();
735         }
736         val |= DMA_CCMD_ICC;
737
738         spin_lock_irqsave(&iommu->register_lock, flag);
739         dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
740
741         /* Make sure hardware complete it */
742         IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
743                 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
744
745         spin_unlock_irqrestore(&iommu->register_lock, flag);
746
747         /* flush context entry will implicitly flush write buffer */
748         return 0;
749 }
750
751 /* return value determine if we need a write buffer flush */
752 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
753         u64 addr, unsigned int size_order, u64 type,
754         int non_present_entry_flush)
755 {
756         int tlb_offset = ecap_iotlb_offset(iommu->ecap);
757         u64 val = 0, val_iva = 0;
758         unsigned long flag;
759
760         /*
761          * In the non-present entry flush case, if hardware doesn't cache
762          * non-present entry we do nothing and if hardware cache non-present
763          * entry, we flush entries of domain 0 (the domain id is used to cache
764          * any non-present entries)
765          */
766         if (non_present_entry_flush) {
767                 if (!cap_caching_mode(iommu->cap))
768                         return 1;
769                 else
770                         did = 0;
771         }
772
773         switch (type) {
774         case DMA_TLB_GLOBAL_FLUSH:
775                 /* global flush doesn't need set IVA_REG */
776                 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
777                 break;
778         case DMA_TLB_DSI_FLUSH:
779                 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
780                 break;
781         case DMA_TLB_PSI_FLUSH:
782                 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
783                 /* Note: always flush non-leaf currently */
784                 val_iva = size_order | addr;
785                 break;
786         default:
787                 BUG();
788         }
789         /* Note: set drain read/write */
790 #if 0
791         /*
792          * This is probably to be super secure.. Looks like we can
793          * ignore it without any impact.
794          */
795         if (cap_read_drain(iommu->cap))
796                 val |= DMA_TLB_READ_DRAIN;
797 #endif
798         if (cap_write_drain(iommu->cap))
799                 val |= DMA_TLB_WRITE_DRAIN;
800
801         spin_lock_irqsave(&iommu->register_lock, flag);
802         /* Note: Only uses first TLB reg currently */
803         if (val_iva)
804                 dmar_writeq(iommu->reg + tlb_offset, val_iva);
805         dmar_writeq(iommu->reg + tlb_offset + 8, val);
806
807         /* Make sure hardware complete it */
808         IOMMU_WAIT_OP(iommu, tlb_offset + 8,
809                 dmar_readq, (!(val & DMA_TLB_IVT)), val);
810
811         spin_unlock_irqrestore(&iommu->register_lock, flag);
812
813         /* check IOTLB invalidation granularity */
814         if (DMA_TLB_IAIG(val) == 0)
815                 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
816         if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
817                 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
818                         (unsigned long long)DMA_TLB_IIRG(type),
819                         (unsigned long long)DMA_TLB_IAIG(val));
820         /* flush iotlb entry will implicitly flush write buffer */
821         return 0;
822 }
823
824 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
825         u64 addr, unsigned int pages, int non_present_entry_flush)
826 {
827         unsigned int mask;
828
829         BUG_ON(addr & (~VTD_PAGE_MASK));
830         BUG_ON(pages == 0);
831
832         /* Fallback to domain selective flush if no PSI support */
833         if (!cap_pgsel_inv(iommu->cap))
834                 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
835                                                 DMA_TLB_DSI_FLUSH,
836                                                 non_present_entry_flush);
837
838         /*
839          * PSI requires page size to be 2 ^ x, and the base address is naturally
840          * aligned to the size
841          */
842         mask = ilog2(__roundup_pow_of_two(pages));
843         /* Fallback to domain selective flush if size is too big */
844         if (mask > cap_max_amask_val(iommu->cap))
845                 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
846                         DMA_TLB_DSI_FLUSH, non_present_entry_flush);
847
848         return iommu->flush.flush_iotlb(iommu, did, addr, mask,
849                                         DMA_TLB_PSI_FLUSH,
850                                         non_present_entry_flush);
851 }
852
853 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
854 {
855         u32 pmen;
856         unsigned long flags;
857
858         spin_lock_irqsave(&iommu->register_lock, flags);
859         pmen = readl(iommu->reg + DMAR_PMEN_REG);
860         pmen &= ~DMA_PMEN_EPM;
861         writel(pmen, iommu->reg + DMAR_PMEN_REG);
862
863         /* wait for the protected region status bit to clear */
864         IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
865                 readl, !(pmen & DMA_PMEN_PRS), pmen);
866
867         spin_unlock_irqrestore(&iommu->register_lock, flags);
868 }
869
870 static int iommu_enable_translation(struct intel_iommu *iommu)
871 {
872         u32 sts;
873         unsigned long flags;
874
875         spin_lock_irqsave(&iommu->register_lock, flags);
876         writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
877
878         /* Make sure hardware complete it */
879         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
880                 readl, (sts & DMA_GSTS_TES), sts);
881
882         iommu->gcmd |= DMA_GCMD_TE;
883         spin_unlock_irqrestore(&iommu->register_lock, flags);
884         return 0;
885 }
886
887 static int iommu_disable_translation(struct intel_iommu *iommu)
888 {
889         u32 sts;
890         unsigned long flag;
891
892         spin_lock_irqsave(&iommu->register_lock, flag);
893         iommu->gcmd &= ~DMA_GCMD_TE;
894         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
895
896         /* Make sure hardware complete it */
897         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
898                 readl, (!(sts & DMA_GSTS_TES)), sts);
899
900         spin_unlock_irqrestore(&iommu->register_lock, flag);
901         return 0;
902 }
903
904 /* iommu interrupt handling. Most stuff are MSI-like. */
905
906 static const char *fault_reason_strings[] =
907 {
908         "Software",
909         "Present bit in root entry is clear",
910         "Present bit in context entry is clear",
911         "Invalid context entry",
912         "Access beyond MGAW",
913         "PTE Write access is not set",
914         "PTE Read access is not set",
915         "Next page table ptr is invalid",
916         "Root table address invalid",
917         "Context table ptr is invalid",
918         "non-zero reserved fields in RTP",
919         "non-zero reserved fields in CTP",
920         "non-zero reserved fields in PTE",
921 };
922 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
923
924 const char *dmar_get_fault_reason(u8 fault_reason)
925 {
926         if (fault_reason > MAX_FAULT_REASON_IDX)
927                 return "Unknown";
928         else
929                 return fault_reason_strings[fault_reason];
930 }
931
932 void dmar_msi_unmask(unsigned int irq)
933 {
934         struct intel_iommu *iommu = get_irq_data(irq);
935         unsigned long flag;
936
937         /* unmask it */
938         spin_lock_irqsave(&iommu->register_lock, flag);
939         writel(0, iommu->reg + DMAR_FECTL_REG);
940         /* Read a reg to force flush the post write */
941         readl(iommu->reg + DMAR_FECTL_REG);
942         spin_unlock_irqrestore(&iommu->register_lock, flag);
943 }
944
945 void dmar_msi_mask(unsigned int irq)
946 {
947         unsigned long flag;
948         struct intel_iommu *iommu = get_irq_data(irq);
949
950         /* mask it */
951         spin_lock_irqsave(&iommu->register_lock, flag);
952         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
953         /* Read a reg to force flush the post write */
954         readl(iommu->reg + DMAR_FECTL_REG);
955         spin_unlock_irqrestore(&iommu->register_lock, flag);
956 }
957
958 void dmar_msi_write(int irq, struct msi_msg *msg)
959 {
960         struct intel_iommu *iommu = get_irq_data(irq);
961         unsigned long flag;
962
963         spin_lock_irqsave(&iommu->register_lock, flag);
964         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
965         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
966         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
967         spin_unlock_irqrestore(&iommu->register_lock, flag);
968 }
969
970 void dmar_msi_read(int irq, struct msi_msg *msg)
971 {
972         struct intel_iommu *iommu = get_irq_data(irq);
973         unsigned long flag;
974
975         spin_lock_irqsave(&iommu->register_lock, flag);
976         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
977         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
978         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
979         spin_unlock_irqrestore(&iommu->register_lock, flag);
980 }
981
982 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
983                 u8 fault_reason, u16 source_id, unsigned long long addr)
984 {
985         const char *reason;
986
987         reason = dmar_get_fault_reason(fault_reason);
988
989         printk(KERN_ERR
990                 "DMAR:[%s] Request device [%02x:%02x.%d] "
991                 "fault addr %llx \n"
992                 "DMAR:[fault reason %02d] %s\n",
993                 (type ? "DMA Read" : "DMA Write"),
994                 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
995                 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
996         return 0;
997 }
998
999 #define PRIMARY_FAULT_REG_LEN (16)
1000 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1001 {
1002         struct intel_iommu *iommu = dev_id;
1003         int reg, fault_index;
1004         u32 fault_status;
1005         unsigned long flag;
1006
1007         spin_lock_irqsave(&iommu->register_lock, flag);
1008         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1009
1010         /* TBD: ignore advanced fault log currently */
1011         if (!(fault_status & DMA_FSTS_PPF))
1012                 goto clear_overflow;
1013
1014         fault_index = dma_fsts_fault_record_index(fault_status);
1015         reg = cap_fault_reg_offset(iommu->cap);
1016         while (1) {
1017                 u8 fault_reason;
1018                 u16 source_id;
1019                 u64 guest_addr;
1020                 int type;
1021                 u32 data;
1022
1023                 /* highest 32 bits */
1024                 data = readl(iommu->reg + reg +
1025                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1026                 if (!(data & DMA_FRCD_F))
1027                         break;
1028
1029                 fault_reason = dma_frcd_fault_reason(data);
1030                 type = dma_frcd_type(data);
1031
1032                 data = readl(iommu->reg + reg +
1033                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1034                 source_id = dma_frcd_source_id(data);
1035
1036                 guest_addr = dmar_readq(iommu->reg + reg +
1037                                 fault_index * PRIMARY_FAULT_REG_LEN);
1038                 guest_addr = dma_frcd_page_addr(guest_addr);
1039                 /* clear the fault */
1040                 writel(DMA_FRCD_F, iommu->reg + reg +
1041                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1042
1043                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1044
1045                 iommu_page_fault_do_one(iommu, type, fault_reason,
1046                                 source_id, guest_addr);
1047
1048                 fault_index++;
1049                 if (fault_index > cap_num_fault_regs(iommu->cap))
1050                         fault_index = 0;
1051                 spin_lock_irqsave(&iommu->register_lock, flag);
1052         }
1053 clear_overflow:
1054         /* clear primary fault overflow */
1055         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1056         if (fault_status & DMA_FSTS_PFO)
1057                 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1058
1059         spin_unlock_irqrestore(&iommu->register_lock, flag);
1060         return IRQ_HANDLED;
1061 }
1062
1063 int dmar_set_interrupt(struct intel_iommu *iommu)
1064 {
1065         int irq, ret;
1066
1067         irq = create_irq();
1068         if (!irq) {
1069                 printk(KERN_ERR "IOMMU: no free vectors\n");
1070                 return -EINVAL;
1071         }
1072
1073         set_irq_data(irq, iommu);
1074         iommu->irq = irq;
1075
1076         ret = arch_setup_dmar_msi(irq);
1077         if (ret) {
1078                 set_irq_data(irq, NULL);
1079                 iommu->irq = 0;
1080                 destroy_irq(irq);
1081                 return 0;
1082         }
1083
1084         /* Force fault register is cleared */
1085         iommu_page_fault(irq, iommu);
1086
1087         ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1088         if (ret)
1089                 printk(KERN_ERR "IOMMU: can't request irq\n");
1090         return ret;
1091 }
1092
1093 static int iommu_init_domains(struct intel_iommu *iommu)
1094 {
1095         unsigned long ndomains;
1096         unsigned long nlongs;
1097
1098         ndomains = cap_ndoms(iommu->cap);
1099         pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1100         nlongs = BITS_TO_LONGS(ndomains);
1101
1102         /* TBD: there might be 64K domains,
1103          * consider other allocation for future chip
1104          */
1105         iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1106         if (!iommu->domain_ids) {
1107                 printk(KERN_ERR "Allocating domain id array failed\n");
1108                 return -ENOMEM;
1109         }
1110         iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1111                         GFP_KERNEL);
1112         if (!iommu->domains) {
1113                 printk(KERN_ERR "Allocating domain array failed\n");
1114                 kfree(iommu->domain_ids);
1115                 return -ENOMEM;
1116         }
1117
1118         spin_lock_init(&iommu->lock);
1119
1120         /*
1121          * if Caching mode is set, then invalid translations are tagged
1122          * with domainid 0. Hence we need to pre-allocate it.
1123          */
1124         if (cap_caching_mode(iommu->cap))
1125                 set_bit(0, iommu->domain_ids);
1126         return 0;
1127 }
1128
1129
1130 static void domain_exit(struct dmar_domain *domain);
1131
1132 void free_dmar_iommu(struct intel_iommu *iommu)
1133 {
1134         struct dmar_domain *domain;
1135         int i;
1136
1137         i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1138         for (; i < cap_ndoms(iommu->cap); ) {
1139                 domain = iommu->domains[i];
1140                 clear_bit(i, iommu->domain_ids);
1141                 domain_exit(domain);
1142                 i = find_next_bit(iommu->domain_ids,
1143                         cap_ndoms(iommu->cap), i+1);
1144         }
1145
1146         if (iommu->gcmd & DMA_GCMD_TE)
1147                 iommu_disable_translation(iommu);
1148
1149         if (iommu->irq) {
1150                 set_irq_data(iommu->irq, NULL);
1151                 /* This will mask the irq */
1152                 free_irq(iommu->irq, iommu);
1153                 destroy_irq(iommu->irq);
1154         }
1155
1156         kfree(iommu->domains);
1157         kfree(iommu->domain_ids);
1158
1159         g_iommus[iommu->seq_id] = NULL;
1160
1161         /* if all iommus are freed, free g_iommus */
1162         for (i = 0; i < g_num_of_iommus; i++) {
1163                 if (g_iommus[i])
1164                         break;
1165         }
1166
1167         if (i == g_num_of_iommus)
1168                 kfree(g_iommus);
1169
1170         /* free context mapping */
1171         free_context_table(iommu);
1172 }
1173
1174 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1175 {
1176         unsigned long num;
1177         unsigned long ndomains;
1178         struct dmar_domain *domain;
1179         unsigned long flags;
1180
1181         domain = alloc_domain_mem();
1182         if (!domain)
1183                 return NULL;
1184
1185         ndomains = cap_ndoms(iommu->cap);
1186
1187         spin_lock_irqsave(&iommu->lock, flags);
1188         num = find_first_zero_bit(iommu->domain_ids, ndomains);
1189         if (num >= ndomains) {
1190                 spin_unlock_irqrestore(&iommu->lock, flags);
1191                 free_domain_mem(domain);
1192                 printk(KERN_ERR "IOMMU: no free domain ids\n");
1193                 return NULL;
1194         }
1195
1196         set_bit(num, iommu->domain_ids);
1197         domain->id = num;
1198         domain->iommu = iommu;
1199         domain->flags = 0;
1200         iommu->domains[num] = domain;
1201         spin_unlock_irqrestore(&iommu->lock, flags);
1202
1203         return domain;
1204 }
1205
1206 static void iommu_free_domain(struct dmar_domain *domain)
1207 {
1208         unsigned long flags;
1209
1210         spin_lock_irqsave(&domain->iommu->lock, flags);
1211         clear_bit(domain->id, domain->iommu->domain_ids);
1212         spin_unlock_irqrestore(&domain->iommu->lock, flags);
1213 }
1214
1215 static struct iova_domain reserved_iova_list;
1216 static struct lock_class_key reserved_alloc_key;
1217 static struct lock_class_key reserved_rbtree_key;
1218
1219 static void dmar_init_reserved_ranges(void)
1220 {
1221         struct pci_dev *pdev = NULL;
1222         struct iova *iova;
1223         int i;
1224         u64 addr, size;
1225
1226         init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1227
1228         lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1229                 &reserved_alloc_key);
1230         lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1231                 &reserved_rbtree_key);
1232
1233         /* IOAPIC ranges shouldn't be accessed by DMA */
1234         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1235                 IOVA_PFN(IOAPIC_RANGE_END));
1236         if (!iova)
1237                 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1238
1239         /* Reserve all PCI MMIO to avoid peer-to-peer access */
1240         for_each_pci_dev(pdev) {
1241                 struct resource *r;
1242
1243                 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1244                         r = &pdev->resource[i];
1245                         if (!r->flags || !(r->flags & IORESOURCE_MEM))
1246                                 continue;
1247                         addr = r->start;
1248                         addr &= PAGE_MASK;
1249                         size = r->end - addr;
1250                         size = PAGE_ALIGN(size);
1251                         iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1252                                 IOVA_PFN(size + addr) - 1);
1253                         if (!iova)
1254                                 printk(KERN_ERR "Reserve iova failed\n");
1255                 }
1256         }
1257
1258 }
1259
1260 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1261 {
1262         copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1263 }
1264
1265 static inline int guestwidth_to_adjustwidth(int gaw)
1266 {
1267         int agaw;
1268         int r = (gaw - 12) % 9;
1269
1270         if (r == 0)
1271                 agaw = gaw;
1272         else
1273                 agaw = gaw + 9 - r;
1274         if (agaw > 64)
1275                 agaw = 64;
1276         return agaw;
1277 }
1278
1279 static int domain_init(struct dmar_domain *domain, int guest_width)
1280 {
1281         struct intel_iommu *iommu;
1282         int adjust_width, agaw;
1283         unsigned long sagaw;
1284
1285         init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1286         spin_lock_init(&domain->mapping_lock);
1287
1288         domain_reserve_special_ranges(domain);
1289
1290         /* calculate AGAW */
1291         iommu = domain->iommu;
1292         if (guest_width > cap_mgaw(iommu->cap))
1293                 guest_width = cap_mgaw(iommu->cap);
1294         domain->gaw = guest_width;
1295         adjust_width = guestwidth_to_adjustwidth(guest_width);
1296         agaw = width_to_agaw(adjust_width);
1297         sagaw = cap_sagaw(iommu->cap);
1298         if (!test_bit(agaw, &sagaw)) {
1299                 /* hardware doesn't support it, choose a bigger one */
1300                 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1301                 agaw = find_next_bit(&sagaw, 5, agaw);
1302                 if (agaw >= 5)
1303                         return -ENODEV;
1304         }
1305         domain->agaw = agaw;
1306         INIT_LIST_HEAD(&domain->devices);
1307
1308         /* always allocate the top pgd */
1309         domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1310         if (!domain->pgd)
1311                 return -ENOMEM;
1312         __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1313         return 0;
1314 }
1315
1316 static void domain_exit(struct dmar_domain *domain)
1317 {
1318         u64 end;
1319
1320         /* Domain 0 is reserved, so dont process it */
1321         if (!domain)
1322                 return;
1323
1324         domain_remove_dev_info(domain);
1325         /* destroy iovas */
1326         put_iova_domain(&domain->iovad);
1327         end = DOMAIN_MAX_ADDR(domain->gaw);
1328         end = end & (~PAGE_MASK);
1329
1330         /* clear ptes */
1331         dma_pte_clear_range(domain, 0, end);
1332
1333         /* free page tables */
1334         dma_pte_free_pagetable(domain, 0, end);
1335
1336         iommu_free_domain(domain);
1337         free_domain_mem(domain);
1338 }
1339
1340 static int domain_context_mapping_one(struct dmar_domain *domain,
1341                 u8 bus, u8 devfn)
1342 {
1343         struct context_entry *context;
1344         struct intel_iommu *iommu = domain->iommu;
1345         unsigned long flags;
1346
1347         pr_debug("Set context mapping for %02x:%02x.%d\n",
1348                 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1349         BUG_ON(!domain->pgd);
1350         context = device_to_context_entry(iommu, bus, devfn);
1351         if (!context)
1352                 return -ENOMEM;
1353         spin_lock_irqsave(&iommu->lock, flags);
1354         if (context_present(context)) {
1355                 spin_unlock_irqrestore(&iommu->lock, flags);
1356                 return 0;
1357         }
1358
1359         context_set_domain_id(context, domain->id);
1360         context_set_address_width(context, domain->agaw);
1361         context_set_address_root(context, virt_to_phys(domain->pgd));
1362         context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1363         context_set_fault_enable(context);
1364         context_set_present(context);
1365         __iommu_flush_cache(iommu, context, sizeof(*context));
1366
1367         /* it's a non-present to present mapping */
1368         if (iommu->flush.flush_context(iommu, domain->id,
1369                 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1370                 DMA_CCMD_DEVICE_INVL, 1))
1371                 iommu_flush_write_buffer(iommu);
1372         else
1373                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1374
1375         spin_unlock_irqrestore(&iommu->lock, flags);
1376         return 0;
1377 }
1378
1379 static int
1380 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1381 {
1382         int ret;
1383         struct pci_dev *tmp, *parent;
1384
1385         ret = domain_context_mapping_one(domain, pdev->bus->number,
1386                 pdev->devfn);
1387         if (ret)
1388                 return ret;
1389
1390         /* dependent device mapping */
1391         tmp = pci_find_upstream_pcie_bridge(pdev);
1392         if (!tmp)
1393                 return 0;
1394         /* Secondary interface's bus number and devfn 0 */
1395         parent = pdev->bus->self;
1396         while (parent != tmp) {
1397                 ret = domain_context_mapping_one(domain, parent->bus->number,
1398                         parent->devfn);
1399                 if (ret)
1400                         return ret;
1401                 parent = parent->bus->self;
1402         }
1403         if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1404                 return domain_context_mapping_one(domain,
1405                         tmp->subordinate->number, 0);
1406         else /* this is a legacy PCI bridge */
1407                 return domain_context_mapping_one(domain,
1408                         tmp->bus->number, tmp->devfn);
1409 }
1410
1411 static int domain_context_mapped(struct dmar_domain *domain,
1412         struct pci_dev *pdev)
1413 {
1414         int ret;
1415         struct pci_dev *tmp, *parent;
1416
1417         ret = device_context_mapped(domain->iommu,
1418                 pdev->bus->number, pdev->devfn);
1419         if (!ret)
1420                 return ret;
1421         /* dependent device mapping */
1422         tmp = pci_find_upstream_pcie_bridge(pdev);
1423         if (!tmp)
1424                 return ret;
1425         /* Secondary interface's bus number and devfn 0 */
1426         parent = pdev->bus->self;
1427         while (parent != tmp) {
1428                 ret = device_context_mapped(domain->iommu, parent->bus->number,
1429                         parent->devfn);
1430                 if (!ret)
1431                         return ret;
1432                 parent = parent->bus->self;
1433         }
1434         if (tmp->is_pcie)
1435                 return device_context_mapped(domain->iommu,
1436                         tmp->subordinate->number, 0);
1437         else
1438                 return device_context_mapped(domain->iommu,
1439                         tmp->bus->number, tmp->devfn);
1440 }
1441
1442 static int
1443 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1444                         u64 hpa, size_t size, int prot)
1445 {
1446         u64 start_pfn, end_pfn;
1447         struct dma_pte *pte;
1448         int index;
1449         int addr_width = agaw_to_width(domain->agaw);
1450
1451         hpa &= (((u64)1) << addr_width) - 1;
1452
1453         if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1454                 return -EINVAL;
1455         iova &= PAGE_MASK;
1456         start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1457         end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1458         index = 0;
1459         while (start_pfn < end_pfn) {
1460                 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1461                 if (!pte)
1462                         return -ENOMEM;
1463                 /* We don't need lock here, nobody else
1464                  * touches the iova range
1465                  */
1466                 BUG_ON(dma_pte_addr(pte));
1467                 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1468                 dma_set_pte_prot(pte, prot);
1469                 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1470                 start_pfn++;
1471                 index++;
1472         }
1473         return 0;
1474 }
1475
1476 static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1477 {
1478         clear_context_table(domain->iommu, bus, devfn);
1479         domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
1480                                            DMA_CCMD_GLOBAL_INVL, 0);
1481         domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
1482                                          DMA_TLB_GLOBAL_FLUSH, 0);
1483 }
1484
1485 static void domain_remove_dev_info(struct dmar_domain *domain)
1486 {
1487         struct device_domain_info *info;
1488         unsigned long flags;
1489
1490         spin_lock_irqsave(&device_domain_lock, flags);
1491         while (!list_empty(&domain->devices)) {
1492                 info = list_entry(domain->devices.next,
1493                         struct device_domain_info, link);
1494                 list_del(&info->link);
1495                 list_del(&info->global);
1496                 if (info->dev)
1497                         info->dev->dev.archdata.iommu = NULL;
1498                 spin_unlock_irqrestore(&device_domain_lock, flags);
1499
1500                 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1501                 free_devinfo_mem(info);
1502
1503                 spin_lock_irqsave(&device_domain_lock, flags);
1504         }
1505         spin_unlock_irqrestore(&device_domain_lock, flags);
1506 }
1507
1508 /*
1509  * find_domain
1510  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1511  */
1512 static struct dmar_domain *
1513 find_domain(struct pci_dev *pdev)
1514 {
1515         struct device_domain_info *info;
1516
1517         /* No lock here, assumes no domain exit in normal case */
1518         info = pdev->dev.archdata.iommu;
1519         if (info)
1520                 return info->domain;
1521         return NULL;
1522 }
1523
1524 /* domain is initialized */
1525 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1526 {
1527         struct dmar_domain *domain, *found = NULL;
1528         struct intel_iommu *iommu;
1529         struct dmar_drhd_unit *drhd;
1530         struct device_domain_info *info, *tmp;
1531         struct pci_dev *dev_tmp;
1532         unsigned long flags;
1533         int bus = 0, devfn = 0;
1534
1535         domain = find_domain(pdev);
1536         if (domain)
1537                 return domain;
1538
1539         dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1540         if (dev_tmp) {
1541                 if (dev_tmp->is_pcie) {
1542                         bus = dev_tmp->subordinate->number;
1543                         devfn = 0;
1544                 } else {
1545                         bus = dev_tmp->bus->number;
1546                         devfn = dev_tmp->devfn;
1547                 }
1548                 spin_lock_irqsave(&device_domain_lock, flags);
1549                 list_for_each_entry(info, &device_domain_list, global) {
1550                         if (info->bus == bus && info->devfn == devfn) {
1551                                 found = info->domain;
1552                                 break;
1553                         }
1554                 }
1555                 spin_unlock_irqrestore(&device_domain_lock, flags);
1556                 /* pcie-pci bridge already has a domain, uses it */
1557                 if (found) {
1558                         domain = found;
1559                         goto found_domain;
1560                 }
1561         }
1562
1563         /* Allocate new domain for the device */
1564         drhd = dmar_find_matched_drhd_unit(pdev);
1565         if (!drhd) {
1566                 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1567                         pci_name(pdev));
1568                 return NULL;
1569         }
1570         iommu = drhd->iommu;
1571
1572         domain = iommu_alloc_domain(iommu);
1573         if (!domain)
1574                 goto error;
1575
1576         if (domain_init(domain, gaw)) {
1577                 domain_exit(domain);
1578                 goto error;
1579         }
1580
1581         /* register pcie-to-pci device */
1582         if (dev_tmp) {
1583                 info = alloc_devinfo_mem();
1584                 if (!info) {
1585                         domain_exit(domain);
1586                         goto error;
1587                 }
1588                 info->bus = bus;
1589                 info->devfn = devfn;
1590                 info->dev = NULL;
1591                 info->domain = domain;
1592                 /* This domain is shared by devices under p2p bridge */
1593                 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1594
1595                 /* pcie-to-pci bridge already has a domain, uses it */
1596                 found = NULL;
1597                 spin_lock_irqsave(&device_domain_lock, flags);
1598                 list_for_each_entry(tmp, &device_domain_list, global) {
1599                         if (tmp->bus == bus && tmp->devfn == devfn) {
1600                                 found = tmp->domain;
1601                                 break;
1602                         }
1603                 }
1604                 if (found) {
1605                         free_devinfo_mem(info);
1606                         domain_exit(domain);
1607                         domain = found;
1608                 } else {
1609                         list_add(&info->link, &domain->devices);
1610                         list_add(&info->global, &device_domain_list);
1611                 }
1612                 spin_unlock_irqrestore(&device_domain_lock, flags);
1613         }
1614
1615 found_domain:
1616         info = alloc_devinfo_mem();
1617         if (!info)
1618                 goto error;
1619         info->bus = pdev->bus->number;
1620         info->devfn = pdev->devfn;
1621         info->dev = pdev;
1622         info->domain = domain;
1623         spin_lock_irqsave(&device_domain_lock, flags);
1624         /* somebody is fast */
1625         found = find_domain(pdev);
1626         if (found != NULL) {
1627                 spin_unlock_irqrestore(&device_domain_lock, flags);
1628                 if (found != domain) {
1629                         domain_exit(domain);
1630                         domain = found;
1631                 }
1632                 free_devinfo_mem(info);
1633                 return domain;
1634         }
1635         list_add(&info->link, &domain->devices);
1636         list_add(&info->global, &device_domain_list);
1637         pdev->dev.archdata.iommu = info;
1638         spin_unlock_irqrestore(&device_domain_lock, flags);
1639         return domain;
1640 error:
1641         /* recheck it here, maybe others set it */
1642         return find_domain(pdev);
1643 }
1644
1645 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1646                                       unsigned long long start,
1647                                       unsigned long long end)
1648 {
1649         struct dmar_domain *domain;
1650         unsigned long size;
1651         unsigned long long base;
1652         int ret;
1653
1654         printk(KERN_INFO
1655                 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1656                 pci_name(pdev), start, end);
1657         /* page table init */
1658         domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1659         if (!domain)
1660                 return -ENOMEM;
1661
1662         /* The address might not be aligned */
1663         base = start & PAGE_MASK;
1664         size = end - base;
1665         size = PAGE_ALIGN(size);
1666         if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1667                         IOVA_PFN(base + size) - 1)) {
1668                 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1669                 ret = -ENOMEM;
1670                 goto error;
1671         }
1672
1673         pr_debug("Mapping reserved region %lx@%llx for %s\n",
1674                 size, base, pci_name(pdev));
1675         /*
1676          * RMRR range might have overlap with physical memory range,
1677          * clear it first
1678          */
1679         dma_pte_clear_range(domain, base, base + size);
1680
1681         ret = domain_page_mapping(domain, base, base, size,
1682                 DMA_PTE_READ|DMA_PTE_WRITE);
1683         if (ret)
1684                 goto error;
1685
1686         /* context entry init */
1687         ret = domain_context_mapping(domain, pdev);
1688         if (!ret)
1689                 return 0;
1690 error:
1691         domain_exit(domain);
1692         return ret;
1693
1694 }
1695
1696 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1697         struct pci_dev *pdev)
1698 {
1699         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1700                 return 0;
1701         return iommu_prepare_identity_map(pdev, rmrr->base_address,
1702                 rmrr->end_address + 1);
1703 }
1704
1705 #ifdef CONFIG_DMAR_GFX_WA
1706 struct iommu_prepare_data {
1707         struct pci_dev *pdev;
1708         int ret;
1709 };
1710
1711 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1712                                          unsigned long end_pfn, void *datax)
1713 {
1714         struct iommu_prepare_data *data;
1715
1716         data = (struct iommu_prepare_data *)datax;
1717
1718         data->ret = iommu_prepare_identity_map(data->pdev,
1719                                 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1720         return data->ret;
1721
1722 }
1723
1724 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1725 {
1726         int nid;
1727         struct iommu_prepare_data data;
1728
1729         data.pdev = pdev;
1730         data.ret = 0;
1731
1732         for_each_online_node(nid) {
1733                 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1734                 if (data.ret)
1735                         return data.ret;
1736         }
1737         return data.ret;
1738 }
1739
1740 static void __init iommu_prepare_gfx_mapping(void)
1741 {
1742         struct pci_dev *pdev = NULL;
1743         int ret;
1744
1745         for_each_pci_dev(pdev) {
1746                 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1747                                 !IS_GFX_DEVICE(pdev))
1748                         continue;
1749                 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1750                         pci_name(pdev));
1751                 ret = iommu_prepare_with_active_regions(pdev);
1752                 if (ret)
1753                         printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1754         }
1755 }
1756 #else /* !CONFIG_DMAR_GFX_WA */
1757 static inline void iommu_prepare_gfx_mapping(void)
1758 {
1759         return;
1760 }
1761 #endif
1762
1763 #ifdef CONFIG_DMAR_FLOPPY_WA
1764 static inline void iommu_prepare_isa(void)
1765 {
1766         struct pci_dev *pdev;
1767         int ret;
1768
1769         pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1770         if (!pdev)
1771                 return;
1772
1773         printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1774         ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1775
1776         if (ret)
1777                 printk("IOMMU: Failed to create 0-64M identity map, "
1778                         "floppy might not work\n");
1779
1780 }
1781 #else
1782 static inline void iommu_prepare_isa(void)
1783 {
1784         return;
1785 }
1786 #endif /* !CONFIG_DMAR_FLPY_WA */
1787
1788 static int __init init_dmars(void)
1789 {
1790         struct dmar_drhd_unit *drhd;
1791         struct dmar_rmrr_unit *rmrr;
1792         struct pci_dev *pdev;
1793         struct intel_iommu *iommu;
1794         int i, ret, unit = 0;
1795
1796         /*
1797          * for each drhd
1798          *    allocate root
1799          *    initialize and program root entry to not present
1800          * endfor
1801          */
1802         for_each_drhd_unit(drhd) {
1803                 g_num_of_iommus++;
1804                 /*
1805                  * lock not needed as this is only incremented in the single
1806                  * threaded kernel __init code path all other access are read
1807                  * only
1808                  */
1809         }
1810
1811         g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1812                         GFP_KERNEL);
1813         if (!g_iommus) {
1814                 printk(KERN_ERR "Allocating global iommu array failed\n");
1815                 ret = -ENOMEM;
1816                 goto error;
1817         }
1818
1819         deferred_flush = kzalloc(g_num_of_iommus *
1820                 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1821         if (!deferred_flush) {
1822                 kfree(g_iommus);
1823                 ret = -ENOMEM;
1824                 goto error;
1825         }
1826
1827         for_each_drhd_unit(drhd) {
1828                 if (drhd->ignored)
1829                         continue;
1830
1831                 iommu = drhd->iommu;
1832                 g_iommus[iommu->seq_id] = iommu;
1833
1834                 ret = iommu_init_domains(iommu);
1835                 if (ret)
1836                         goto error;
1837
1838                 /*
1839                  * TBD:
1840                  * we could share the same root & context tables
1841                  * amoung all IOMMU's. Need to Split it later.
1842                  */
1843                 ret = iommu_alloc_root_entry(iommu);
1844                 if (ret) {
1845                         printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1846                         goto error;
1847                 }
1848         }
1849
1850         for_each_drhd_unit(drhd) {
1851                 if (drhd->ignored)
1852                         continue;
1853
1854                 iommu = drhd->iommu;
1855                 if (dmar_enable_qi(iommu)) {
1856                         /*
1857                          * Queued Invalidate not enabled, use Register Based
1858                          * Invalidate
1859                          */
1860                         iommu->flush.flush_context = __iommu_flush_context;
1861                         iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1862                         printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
1863                                "invalidation\n",
1864                                (unsigned long long)drhd->reg_base_addr);
1865                 } else {
1866                         iommu->flush.flush_context = qi_flush_context;
1867                         iommu->flush.flush_iotlb = qi_flush_iotlb;
1868                         printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
1869                                "invalidation\n",
1870                                (unsigned long long)drhd->reg_base_addr);
1871                 }
1872         }
1873
1874         /*
1875          * For each rmrr
1876          *   for each dev attached to rmrr
1877          *   do
1878          *     locate drhd for dev, alloc domain for dev
1879          *     allocate free domain
1880          *     allocate page table entries for rmrr
1881          *     if context not allocated for bus
1882          *           allocate and init context
1883          *           set present in root table for this bus
1884          *     init context with domain, translation etc
1885          *    endfor
1886          * endfor
1887          */
1888         for_each_rmrr_units(rmrr) {
1889                 for (i = 0; i < rmrr->devices_cnt; i++) {
1890                         pdev = rmrr->devices[i];
1891                         /* some BIOS lists non-exist devices in DMAR table */
1892                         if (!pdev)
1893                                 continue;
1894                         ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1895                         if (ret)
1896                                 printk(KERN_ERR
1897                                  "IOMMU: mapping reserved region failed\n");
1898                 }
1899         }
1900
1901         iommu_prepare_gfx_mapping();
1902
1903         iommu_prepare_isa();
1904
1905         /*
1906          * for each drhd
1907          *   enable fault log
1908          *   global invalidate context cache
1909          *   global invalidate iotlb
1910          *   enable translation
1911          */
1912         for_each_drhd_unit(drhd) {
1913                 if (drhd->ignored)
1914                         continue;
1915                 iommu = drhd->iommu;
1916                 sprintf (iommu->name, "dmar%d", unit++);
1917
1918                 iommu_flush_write_buffer(iommu);
1919
1920                 ret = dmar_set_interrupt(iommu);
1921                 if (ret)
1922                         goto error;
1923
1924                 iommu_set_root_entry(iommu);
1925
1926                 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1927                                            0);
1928                 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1929                                          0);
1930                 iommu_disable_protect_mem_regions(iommu);
1931
1932                 ret = iommu_enable_translation(iommu);
1933                 if (ret)
1934                         goto error;
1935         }
1936
1937         return 0;
1938 error:
1939         for_each_drhd_unit(drhd) {
1940                 if (drhd->ignored)
1941                         continue;
1942                 iommu = drhd->iommu;
1943                 free_iommu(iommu);
1944         }
1945         kfree(g_iommus);
1946         return ret;
1947 }
1948
1949 static inline u64 aligned_size(u64 host_addr, size_t size)
1950 {
1951         u64 addr;
1952         addr = (host_addr & (~PAGE_MASK)) + size;
1953         return PAGE_ALIGN(addr);
1954 }
1955
1956 struct iova *
1957 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
1958 {
1959         struct iova *piova;
1960
1961         /* Make sure it's in range */
1962         end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
1963         if (!size || (IOVA_START_ADDR + size > end))
1964                 return NULL;
1965
1966         piova = alloc_iova(&domain->iovad,
1967                         size >> PAGE_SHIFT, IOVA_PFN(end), 1);
1968         return piova;
1969 }
1970
1971 static struct iova *
1972 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1973                    size_t size, u64 dma_mask)
1974 {
1975         struct pci_dev *pdev = to_pci_dev(dev);
1976         struct iova *iova = NULL;
1977
1978         if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
1979                 iova = iommu_alloc_iova(domain, size, dma_mask);
1980         else {
1981                 /*
1982                  * First try to allocate an io virtual address in
1983                  * DMA_32BIT_MASK and if that fails then try allocating
1984                  * from higher range
1985                  */
1986                 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1987                 if (!iova)
1988                         iova = iommu_alloc_iova(domain, size, dma_mask);
1989         }
1990
1991         if (!iova) {
1992                 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1993                 return NULL;
1994         }
1995
1996         return iova;
1997 }
1998
1999 static struct dmar_domain *
2000 get_valid_domain_for_dev(struct pci_dev *pdev)
2001 {
2002         struct dmar_domain *domain;
2003         int ret;
2004
2005         domain = get_domain_for_dev(pdev,
2006                         DEFAULT_DOMAIN_ADDRESS_WIDTH);
2007         if (!domain) {
2008                 printk(KERN_ERR
2009                         "Allocating domain for %s failed", pci_name(pdev));
2010                 return NULL;
2011         }
2012
2013         /* make sure context mapping is ok */
2014         if (unlikely(!domain_context_mapped(domain, pdev))) {
2015                 ret = domain_context_mapping(domain, pdev);
2016                 if (ret) {
2017                         printk(KERN_ERR
2018                                 "Domain context map for %s failed",
2019                                 pci_name(pdev));
2020                         return NULL;
2021                 }
2022         }
2023
2024         return domain;
2025 }
2026
2027 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2028                                      size_t size, int dir, u64 dma_mask)
2029 {
2030         struct pci_dev *pdev = to_pci_dev(hwdev);
2031         struct dmar_domain *domain;
2032         phys_addr_t start_paddr;
2033         struct iova *iova;
2034         int prot = 0;
2035         int ret;
2036
2037         BUG_ON(dir == DMA_NONE);
2038         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2039                 return paddr;
2040
2041         domain = get_valid_domain_for_dev(pdev);
2042         if (!domain)
2043                 return 0;
2044
2045         size = aligned_size((u64)paddr, size);
2046
2047         iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2048         if (!iova)
2049                 goto error;
2050
2051         start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2052
2053         /*
2054          * Check if DMAR supports zero-length reads on write only
2055          * mappings..
2056          */
2057         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2058                         !cap_zlr(domain->iommu->cap))
2059                 prot |= DMA_PTE_READ;
2060         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2061                 prot |= DMA_PTE_WRITE;
2062         /*
2063          * paddr - (paddr + size) might be partial page, we should map the whole
2064          * page.  Note: if two part of one page are separately mapped, we
2065          * might have two guest_addr mapping to the same host paddr, but this
2066          * is not a big problem
2067          */
2068         ret = domain_page_mapping(domain, start_paddr,
2069                 ((u64)paddr) & PAGE_MASK, size, prot);
2070         if (ret)
2071                 goto error;
2072
2073         /* it's a non-present to present mapping */
2074         ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
2075                         start_paddr, size >> VTD_PAGE_SHIFT, 1);
2076         if (ret)
2077                 iommu_flush_write_buffer(domain->iommu);
2078
2079         return start_paddr + ((u64)paddr & (~PAGE_MASK));
2080
2081 error:
2082         if (iova)
2083                 __free_iova(&domain->iovad, iova);
2084         printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
2085                 pci_name(pdev), size, (unsigned long long)paddr, dir);
2086         return 0;
2087 }
2088
2089 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2090                             size_t size, int dir)
2091 {
2092         return __intel_map_single(hwdev, paddr, size, dir,
2093                                   to_pci_dev(hwdev)->dma_mask);
2094 }
2095
2096 static void flush_unmaps(void)
2097 {
2098         int i, j;
2099
2100         timer_on = 0;
2101
2102         /* just flush them all */
2103         for (i = 0; i < g_num_of_iommus; i++) {
2104                 struct intel_iommu *iommu = g_iommus[i];
2105                 if (!iommu)
2106                         continue;
2107
2108                 if (deferred_flush[i].next) {
2109                         iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2110                                                  DMA_TLB_GLOBAL_FLUSH, 0);
2111                         for (j = 0; j < deferred_flush[i].next; j++) {
2112                                 __free_iova(&deferred_flush[i].domain[j]->iovad,
2113                                                 deferred_flush[i].iova[j]);
2114                         }
2115                         deferred_flush[i].next = 0;
2116                 }
2117         }
2118
2119         list_size = 0;
2120 }
2121
2122 static void flush_unmaps_timeout(unsigned long data)
2123 {
2124         unsigned long flags;
2125
2126         spin_lock_irqsave(&async_umap_flush_lock, flags);
2127         flush_unmaps();
2128         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2129 }
2130
2131 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2132 {
2133         unsigned long flags;
2134         int next, iommu_id;
2135
2136         spin_lock_irqsave(&async_umap_flush_lock, flags);
2137         if (list_size == HIGH_WATER_MARK)
2138                 flush_unmaps();
2139
2140         iommu_id = dom->iommu->seq_id;
2141
2142         next = deferred_flush[iommu_id].next;
2143         deferred_flush[iommu_id].domain[next] = dom;
2144         deferred_flush[iommu_id].iova[next] = iova;
2145         deferred_flush[iommu_id].next++;
2146
2147         if (!timer_on) {
2148                 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2149                 timer_on = 1;
2150         }
2151         list_size++;
2152         spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2153 }
2154
2155 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2156                         int dir)
2157 {
2158         struct pci_dev *pdev = to_pci_dev(dev);
2159         struct dmar_domain *domain;
2160         unsigned long start_addr;
2161         struct iova *iova;
2162
2163         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2164                 return;
2165         domain = find_domain(pdev);
2166         BUG_ON(!domain);
2167
2168         iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2169         if (!iova)
2170                 return;
2171
2172         start_addr = iova->pfn_lo << PAGE_SHIFT;
2173         size = aligned_size((u64)dev_addr, size);
2174
2175         pr_debug("Device %s unmapping: %lx@%llx\n",
2176                 pci_name(pdev), size, (unsigned long long)start_addr);
2177
2178         /*  clear the whole page */
2179         dma_pte_clear_range(domain, start_addr, start_addr + size);
2180         /* free page tables */
2181         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2182         if (intel_iommu_strict) {
2183                 if (iommu_flush_iotlb_psi(domain->iommu,
2184                         domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2185                         iommu_flush_write_buffer(domain->iommu);
2186                 /* free iova */
2187                 __free_iova(&domain->iovad, iova);
2188         } else {
2189                 add_unmap(domain, iova);
2190                 /*
2191                  * queue up the release of the unmap to save the 1/6th of the
2192                  * cpu used up by the iotlb flush operation...
2193                  */
2194         }
2195 }
2196
2197 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2198                            dma_addr_t *dma_handle, gfp_t flags)
2199 {
2200         void *vaddr;
2201         int order;
2202
2203         size = PAGE_ALIGN(size);
2204         order = get_order(size);
2205         flags &= ~(GFP_DMA | GFP_DMA32);
2206
2207         vaddr = (void *)__get_free_pages(flags, order);
2208         if (!vaddr)
2209                 return NULL;
2210         memset(vaddr, 0, size);
2211
2212         *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2213                                          DMA_BIDIRECTIONAL,
2214                                          hwdev->coherent_dma_mask);
2215         if (*dma_handle)
2216                 return vaddr;
2217         free_pages((unsigned long)vaddr, order);
2218         return NULL;
2219 }
2220
2221 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2222                          dma_addr_t dma_handle)
2223 {
2224         int order;
2225
2226         size = PAGE_ALIGN(size);
2227         order = get_order(size);
2228
2229         intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2230         free_pages((unsigned long)vaddr, order);
2231 }
2232
2233 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2234
2235 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2236                     int nelems, int dir)
2237 {
2238         int i;
2239         struct pci_dev *pdev = to_pci_dev(hwdev);
2240         struct dmar_domain *domain;
2241         unsigned long start_addr;
2242         struct iova *iova;
2243         size_t size = 0;
2244         void *addr;
2245         struct scatterlist *sg;
2246
2247         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2248                 return;
2249
2250         domain = find_domain(pdev);
2251
2252         iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2253         if (!iova)
2254                 return;
2255         for_each_sg(sglist, sg, nelems, i) {
2256                 addr = SG_ENT_VIRT_ADDRESS(sg);
2257                 size += aligned_size((u64)addr, sg->length);
2258         }
2259
2260         start_addr = iova->pfn_lo << PAGE_SHIFT;
2261
2262         /*  clear the whole page */
2263         dma_pte_clear_range(domain, start_addr, start_addr + size);
2264         /* free page tables */
2265         dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2266
2267         if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2268                         size >> VTD_PAGE_SHIFT, 0))
2269                 iommu_flush_write_buffer(domain->iommu);
2270
2271         /* free iova */
2272         __free_iova(&domain->iovad, iova);
2273 }
2274
2275 static int intel_nontranslate_map_sg(struct device *hddev,
2276         struct scatterlist *sglist, int nelems, int dir)
2277 {
2278         int i;
2279         struct scatterlist *sg;
2280
2281         for_each_sg(sglist, sg, nelems, i) {
2282                 BUG_ON(!sg_page(sg));
2283                 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2284                 sg->dma_length = sg->length;
2285         }
2286         return nelems;
2287 }
2288
2289 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2290                  int dir)
2291 {
2292         void *addr;
2293         int i;
2294         struct pci_dev *pdev = to_pci_dev(hwdev);
2295         struct dmar_domain *domain;
2296         size_t size = 0;
2297         int prot = 0;
2298         size_t offset = 0;
2299         struct iova *iova = NULL;
2300         int ret;
2301         struct scatterlist *sg;
2302         unsigned long start_addr;
2303
2304         BUG_ON(dir == DMA_NONE);
2305         if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2306                 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2307
2308         domain = get_valid_domain_for_dev(pdev);
2309         if (!domain)
2310                 return 0;
2311
2312         for_each_sg(sglist, sg, nelems, i) {
2313                 addr = SG_ENT_VIRT_ADDRESS(sg);
2314                 addr = (void *)virt_to_phys(addr);
2315                 size += aligned_size((u64)addr, sg->length);
2316         }
2317
2318         iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2319         if (!iova) {
2320                 sglist->dma_length = 0;
2321                 return 0;
2322         }
2323
2324         /*
2325          * Check if DMAR supports zero-length reads on write only
2326          * mappings..
2327          */
2328         if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2329                         !cap_zlr(domain->iommu->cap))
2330                 prot |= DMA_PTE_READ;
2331         if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2332                 prot |= DMA_PTE_WRITE;
2333
2334         start_addr = iova->pfn_lo << PAGE_SHIFT;
2335         offset = 0;
2336         for_each_sg(sglist, sg, nelems, i) {
2337                 addr = SG_ENT_VIRT_ADDRESS(sg);
2338                 addr = (void *)virt_to_phys(addr);
2339                 size = aligned_size((u64)addr, sg->length);
2340                 ret = domain_page_mapping(domain, start_addr + offset,
2341                         ((u64)addr) & PAGE_MASK,
2342                         size, prot);
2343                 if (ret) {
2344                         /*  clear the page */
2345                         dma_pte_clear_range(domain, start_addr,
2346                                   start_addr + offset);
2347                         /* free page tables */
2348                         dma_pte_free_pagetable(domain, start_addr,
2349                                   start_addr + offset);
2350                         /* free iova */
2351                         __free_iova(&domain->iovad, iova);
2352                         return 0;
2353                 }
2354                 sg->dma_address = start_addr + offset +
2355                                 ((u64)addr & (~PAGE_MASK));
2356                 sg->dma_length = sg->length;
2357                 offset += size;
2358         }
2359
2360         /* it's a non-present to present mapping */
2361         if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2362                         start_addr, offset >> VTD_PAGE_SHIFT, 1))
2363                 iommu_flush_write_buffer(domain->iommu);
2364         return nelems;
2365 }
2366
2367 static struct dma_mapping_ops intel_dma_ops = {
2368         .alloc_coherent = intel_alloc_coherent,
2369         .free_coherent = intel_free_coherent,
2370         .map_single = intel_map_single,
2371         .unmap_single = intel_unmap_single,
2372         .map_sg = intel_map_sg,
2373         .unmap_sg = intel_unmap_sg,
2374 };
2375
2376 static inline int iommu_domain_cache_init(void)
2377 {
2378         int ret = 0;
2379
2380         iommu_domain_cache = kmem_cache_create("iommu_domain",
2381                                          sizeof(struct dmar_domain),
2382                                          0,
2383                                          SLAB_HWCACHE_ALIGN,
2384
2385                                          NULL);
2386         if (!iommu_domain_cache) {
2387                 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2388                 ret = -ENOMEM;
2389         }
2390
2391         return ret;
2392 }
2393
2394 static inline int iommu_devinfo_cache_init(void)
2395 {
2396         int ret = 0;
2397
2398         iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2399                                          sizeof(struct device_domain_info),
2400                                          0,
2401                                          SLAB_HWCACHE_ALIGN,
2402                                          NULL);
2403         if (!iommu_devinfo_cache) {
2404                 printk(KERN_ERR "Couldn't create devinfo cache\n");
2405                 ret = -ENOMEM;
2406         }
2407
2408         return ret;
2409 }
2410
2411 static inline int iommu_iova_cache_init(void)
2412 {
2413         int ret = 0;
2414
2415         iommu_iova_cache = kmem_cache_create("iommu_iova",
2416                                          sizeof(struct iova),
2417                                          0,
2418                                          SLAB_HWCACHE_ALIGN,
2419                                          NULL);
2420         if (!iommu_iova_cache) {
2421                 printk(KERN_ERR "Couldn't create iova cache\n");
2422                 ret = -ENOMEM;
2423         }
2424
2425         return ret;
2426 }
2427
2428 static int __init iommu_init_mempool(void)
2429 {
2430         int ret;
2431         ret = iommu_iova_cache_init();
2432         if (ret)
2433                 return ret;
2434
2435         ret = iommu_domain_cache_init();
2436         if (ret)
2437                 goto domain_error;
2438
2439         ret = iommu_devinfo_cache_init();
2440         if (!ret)
2441                 return ret;
2442
2443         kmem_cache_destroy(iommu_domain_cache);
2444 domain_error:
2445         kmem_cache_destroy(iommu_iova_cache);
2446
2447         return -ENOMEM;
2448 }
2449
2450 static void __init iommu_exit_mempool(void)
2451 {
2452         kmem_cache_destroy(iommu_devinfo_cache);
2453         kmem_cache_destroy(iommu_domain_cache);
2454         kmem_cache_destroy(iommu_iova_cache);
2455
2456 }
2457
2458 static void __init init_no_remapping_devices(void)
2459 {
2460         struct dmar_drhd_unit *drhd;
2461
2462         for_each_drhd_unit(drhd) {
2463                 if (!drhd->include_all) {
2464                         int i;
2465                         for (i = 0; i < drhd->devices_cnt; i++)
2466                                 if (drhd->devices[i] != NULL)
2467                                         break;
2468                         /* ignore DMAR unit if no pci devices exist */
2469                         if (i == drhd->devices_cnt)
2470                                 drhd->ignored = 1;
2471                 }
2472         }
2473
2474         if (dmar_map_gfx)
2475                 return;
2476
2477         for_each_drhd_unit(drhd) {
2478                 int i;
2479                 if (drhd->ignored || drhd->include_all)
2480                         continue;
2481
2482                 for (i = 0; i < drhd->devices_cnt; i++)
2483                         if (drhd->devices[i] &&
2484                                 !IS_GFX_DEVICE(drhd->devices[i]))
2485                                 break;
2486
2487                 if (i < drhd->devices_cnt)
2488                         continue;
2489
2490                 /* bypass IOMMU if it is just for gfx devices */
2491                 drhd->ignored = 1;
2492                 for (i = 0; i < drhd->devices_cnt; i++) {
2493                         if (!drhd->devices[i])
2494                                 continue;
2495                         drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2496                 }
2497         }
2498 }
2499
2500 int __init intel_iommu_init(void)
2501 {
2502         int ret = 0;
2503
2504         if (dmar_table_init())
2505                 return  -ENODEV;
2506
2507         if (dmar_dev_scope_init())
2508                 return  -ENODEV;
2509
2510         /*
2511          * Check the need for DMA-remapping initialization now.
2512          * Above initialization will also be used by Interrupt-remapping.
2513          */
2514         if (no_iommu || swiotlb || dmar_disabled)
2515                 return -ENODEV;
2516
2517         iommu_init_mempool();
2518         dmar_init_reserved_ranges();
2519
2520         init_no_remapping_devices();
2521
2522         ret = init_dmars();
2523         if (ret) {
2524                 printk(KERN_ERR "IOMMU: dmar init failed\n");
2525                 put_iova_domain(&reserved_iova_list);
2526                 iommu_exit_mempool();
2527                 return ret;
2528         }
2529         printk(KERN_INFO
2530         "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2531
2532         init_timer(&unmap_timer);
2533         force_iommu = 1;
2534         dma_ops = &intel_dma_ops;
2535         return 0;
2536 }
2537
2538 void intel_iommu_domain_exit(struct dmar_domain *domain)
2539 {
2540         u64 end;
2541
2542         /* Domain 0 is reserved, so dont process it */
2543         if (!domain)
2544                 return;
2545
2546         end = DOMAIN_MAX_ADDR(domain->gaw);
2547         end = end & (~VTD_PAGE_MASK);
2548
2549         /* clear ptes */
2550         dma_pte_clear_range(domain, 0, end);
2551
2552         /* free page tables */
2553         dma_pte_free_pagetable(domain, 0, end);
2554
2555         iommu_free_domain(domain);
2556         free_domain_mem(domain);
2557 }
2558 EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2559
2560 struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2561 {
2562         struct dmar_drhd_unit *drhd;
2563         struct dmar_domain *domain;
2564         struct intel_iommu *iommu;
2565
2566         drhd = dmar_find_matched_drhd_unit(pdev);
2567         if (!drhd) {
2568                 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2569                 return NULL;
2570         }
2571
2572         iommu = drhd->iommu;
2573         if (!iommu) {
2574                 printk(KERN_ERR
2575                         "intel_iommu_domain_alloc: iommu == NULL\n");
2576                 return NULL;
2577         }
2578         domain = iommu_alloc_domain(iommu);
2579         if (!domain) {
2580                 printk(KERN_ERR
2581                         "intel_iommu_domain_alloc: domain == NULL\n");
2582                 return NULL;
2583         }
2584         if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2585                 printk(KERN_ERR
2586                         "intel_iommu_domain_alloc: domain_init() failed\n");
2587                 intel_iommu_domain_exit(domain);
2588                 return NULL;
2589         }
2590         return domain;
2591 }
2592 EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2593
2594 int intel_iommu_context_mapping(
2595         struct dmar_domain *domain, struct pci_dev *pdev)
2596 {
2597         int rc;
2598         rc = domain_context_mapping(domain, pdev);
2599         return rc;
2600 }
2601 EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2602
2603 int intel_iommu_page_mapping(
2604         struct dmar_domain *domain, dma_addr_t iova,
2605         u64 hpa, size_t size, int prot)
2606 {
2607         int rc;
2608         rc = domain_page_mapping(domain, iova, hpa, size, prot);
2609         return rc;
2610 }
2611 EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2612
2613 void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2614 {
2615         detach_domain_for_dev(domain, bus, devfn);
2616 }
2617 EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2618
2619 struct dmar_domain *
2620 intel_iommu_find_domain(struct pci_dev *pdev)
2621 {
2622         return find_domain(pdev);
2623 }
2624 EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2625
2626 int intel_iommu_found(void)
2627 {
2628         return g_num_of_iommus;
2629 }
2630 EXPORT_SYMBOL_GPL(intel_iommu_found);
2631
2632 u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2633 {
2634         struct dma_pte *pte;
2635         u64 pfn;
2636
2637         pfn = 0;
2638         pte = addr_to_dma_pte(domain, iova);
2639
2640         if (pte)
2641                 pfn = dma_pte_addr(pte);
2642
2643         return pfn >> VTD_PAGE_SHIFT;
2644 }
2645 EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);