Subversion Repositories Kolibri OS

Rev

Rev 4539 | Rev 5060 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright © 2010 Daniel Vetter
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  */
  24.  
  25.  
  26. #define AGP_NORMAL_MEMORY 0
  27.  
  28. #define AGP_USER_TYPES (1 << 16)
  29. #define AGP_USER_MEMORY (AGP_USER_TYPES)
  30. #define AGP_USER_CACHED_MEMORY (AGP_USER_TYPES + 1)
  31.  
  32. #include <drm/drmP.h>
  33. #include <drm/i915_drm.h>
  34. #include "i915_drv.h"
  35. #include "i915_trace.h"
  36. #include "intel_drv.h"
  37.  
  38. #define GEN6_PPGTT_PD_ENTRIES 512
  39. #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
  40. typedef uint64_t gen8_gtt_pte_t;
  41. typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
  42.  
  43. /* PPGTT stuff */
  44. #define GEN6_GTT_ADDR_ENCODE(addr)      ((addr) | (((addr) >> 28) & 0xff0))
  45. #define HSW_GTT_ADDR_ENCODE(addr)       ((addr) | (((addr) >> 28) & 0x7f0))
  46.  
  47. #define GEN6_PDE_VALID                  (1 << 0)
  48. /* gen6+ has bit 11-4 for physical addr bit 39-32 */
  49. #define GEN6_PDE_ADDR_ENCODE(addr)      GEN6_GTT_ADDR_ENCODE(addr)
  50.  
  51. #define GEN6_PTE_VALID                  (1 << 0)
  52. #define GEN6_PTE_UNCACHED               (1 << 1)
  53. #define HSW_PTE_UNCACHED                (0)
  54. #define GEN6_PTE_CACHE_LLC              (2 << 1)
  55. #define GEN7_PTE_CACHE_L3_LLC           (3 << 1)
  56. #define GEN6_PTE_ADDR_ENCODE(addr)      GEN6_GTT_ADDR_ENCODE(addr)
  57. #define HSW_PTE_ADDR_ENCODE(addr)       HSW_GTT_ADDR_ENCODE(addr)
  58.  
  59. /* Cacheability Control is a 4-bit value. The low three bits are stored in *
  60.  * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
  61.  */
  62. #define HSW_CACHEABILITY_CONTROL(bits)  ((((bits) & 0x7) << 1) | \
  63.                                          (((bits) & 0x8) << (11 - 3)))
  64. #define HSW_WB_LLC_AGE3                 HSW_CACHEABILITY_CONTROL(0x2)
  65. #define HSW_WB_LLC_AGE0                 HSW_CACHEABILITY_CONTROL(0x3)
  66. #define HSW_WB_ELLC_LLC_AGE0            HSW_CACHEABILITY_CONTROL(0xb)
  67. #define HSW_WB_ELLC_LLC_AGE3            HSW_CACHEABILITY_CONTROL(0x8)
  68. #define HSW_WT_ELLC_LLC_AGE0            HSW_CACHEABILITY_CONTROL(0x6)
  69. #define HSW_WT_ELLC_LLC_AGE3            HSW_CACHEABILITY_CONTROL(0x7)
  70.  
  71. #define GEN8_PTES_PER_PAGE              (PAGE_SIZE / sizeof(gen8_gtt_pte_t))
  72. #define GEN8_PDES_PER_PAGE              (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
  73. #define GEN8_LEGACY_PDPS                4
  74.  
  75. #define PPAT_UNCACHED_INDEX             (_PAGE_PWT | _PAGE_PCD)
  76. #define PPAT_CACHED_PDE_INDEX           0 /* WB LLC */
  77. #define PPAT_CACHED_INDEX               _PAGE_PAT /* WB LLCeLLC */
  78. #define PPAT_DISPLAY_ELLC_INDEX         _PAGE_PCD /* WT eLLC */
  79.  
  80. static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
  81.                                              enum i915_cache_level level,
  82.                                              bool valid)
  83. {
  84.     gen8_gtt_pte_t pte = valid ? 1 | 2 : 0;
  85.         pte |= addr;
  86.         if (level != I915_CACHE_NONE)
  87.                 pte |= PPAT_CACHED_INDEX;
  88.         else
  89.                 pte |= PPAT_UNCACHED_INDEX;
  90.         return pte;
  91. }
  92.  
  93. static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
  94.                                              dma_addr_t addr,
  95.                                              enum i915_cache_level level)
  96. {
  97.         gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
  98.         pde |= addr;
  99.         if (level != I915_CACHE_NONE)
  100.                 pde |= PPAT_CACHED_PDE_INDEX;
  101.         else
  102.                 pde |= PPAT_UNCACHED_INDEX;
  103.         return pde;
  104. }
  105.  
  106. static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
  107.                                      enum i915_cache_level level,
  108.                                      bool valid)
  109. {
  110.         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  111.         pte |= GEN6_PTE_ADDR_ENCODE(addr);
  112.  
  113.         switch (level) {
  114.         case I915_CACHE_L3_LLC:
  115.         case I915_CACHE_LLC:
  116.                 pte |= GEN6_PTE_CACHE_LLC;
  117.                 break;
  118.         case I915_CACHE_NONE:
  119.                 pte |= GEN6_PTE_UNCACHED;
  120.                 break;
  121.         default:
  122.                 WARN_ON(1);
  123.         }
  124.  
  125.         return pte;
  126. }
  127.  
  128. static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
  129.                                      enum i915_cache_level level,
  130.                                      bool valid)
  131. {
  132.         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  133.         pte |= GEN6_PTE_ADDR_ENCODE(addr);
  134.  
  135.         switch (level) {
  136.         case I915_CACHE_L3_LLC:
  137.                 pte |= GEN7_PTE_CACHE_L3_LLC;
  138.                 break;
  139.         case I915_CACHE_LLC:
  140.                 pte |= GEN6_PTE_CACHE_LLC;
  141.                 break;
  142.         case I915_CACHE_NONE:
  143.                         pte |= GEN6_PTE_UNCACHED;
  144.                 break;
  145.         default:
  146.                 WARN_ON(1);
  147.         }
  148.  
  149.         return pte;
  150. }
  151.  
  152. #define BYT_PTE_WRITEABLE               (1 << 1)
  153. #define BYT_PTE_SNOOPED_BY_CPU_CACHES   (1 << 2)
  154.  
  155. static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
  156.                                      enum i915_cache_level level,
  157.                                      bool valid)
  158. {
  159.         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  160.         pte |= GEN6_PTE_ADDR_ENCODE(addr);
  161.  
  162.         /* Mark the page as writeable.  Other platforms don't have a
  163.          * setting for read-only/writable, so this matches that behavior.
  164.          */
  165.         pte |= BYT_PTE_WRITEABLE;
  166.  
  167.         if (level != I915_CACHE_NONE)
  168.                 pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
  169.  
  170.         return pte;
  171. }
  172.  
  173. static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
  174.                                      enum i915_cache_level level,
  175.                                      bool valid)
  176. {
  177.         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  178.         pte |= HSW_PTE_ADDR_ENCODE(addr);
  179.  
  180.         if (level != I915_CACHE_NONE)
  181.                 pte |= HSW_WB_LLC_AGE3;
  182.  
  183.         return pte;
  184. }
  185.  
  186. static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
  187.                                       enum i915_cache_level level,
  188.                                       bool valid)
  189. {
  190.         gen6_gtt_pte_t pte = valid ? GEN6_PTE_VALID : 0;
  191.         pte |= HSW_PTE_ADDR_ENCODE(addr);
  192.  
  193.         switch (level) {
  194.         case I915_CACHE_NONE:
  195.                 break;
  196.         case I915_CACHE_WT:
  197.                 pte |= HSW_WT_ELLC_LLC_AGE3;
  198.                 break;
  199.         default:
  200.                 pte |= HSW_WB_ELLC_LLC_AGE3;
  201.                 break;
  202.         }
  203.  
  204.         return pte;
  205. }
  206.  
  207. /* Broadwell Page Directory Pointer Descriptors */
  208. static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry,
  209.                            uint64_t val)
  210. {
  211.         int ret;
  212.  
  213.         BUG_ON(entry >= 4);
  214.  
  215.         ret = intel_ring_begin(ring, 6);
  216.         if (ret)
  217.                 return ret;
  218.  
  219.         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
  220.         intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
  221.         intel_ring_emit(ring, (u32)(val >> 32));
  222.         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
  223.         intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
  224.         intel_ring_emit(ring, (u32)(val));
  225.         intel_ring_advance(ring);
  226.  
  227.         return 0;
  228. }
  229.  
  230. static int gen8_ppgtt_enable(struct drm_device *dev)
  231. {
  232.         struct drm_i915_private *dev_priv = dev->dev_private;
  233.         struct intel_ring_buffer *ring;
  234.         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
  235.         int i, j, ret;
  236.  
  237.         /* bit of a hack to find the actual last used pd */
  238.         int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
  239.  
  240.         for_each_ring(ring, dev_priv, j) {
  241.                 I915_WRITE(RING_MODE_GEN7(ring),
  242.                            _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  243.         }
  244.  
  245.         for (i = used_pd - 1; i >= 0; i--) {
  246.                 dma_addr_t addr = ppgtt->pd_dma_addr[i];
  247.                 for_each_ring(ring, dev_priv, j) {
  248.                         ret = gen8_write_pdp(ring, i, addr);
  249.                         if (ret)
  250.                                 goto err_out;
  251.                 }
  252.         }
  253.         return 0;
  254.  
  255. err_out:
  256.         for_each_ring(ring, dev_priv, j)
  257.                 I915_WRITE(RING_MODE_GEN7(ring),
  258.                            _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
  259.         return ret;
  260. }
  261.  
  262. static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
  263.                                    unsigned first_entry,
  264.                                    unsigned num_entries,
  265.                                    bool use_scratch)
  266. {
  267.         struct i915_hw_ppgtt *ppgtt =
  268.                 container_of(vm, struct i915_hw_ppgtt, base);
  269.         gen8_gtt_pte_t *pt_vaddr, scratch_pte;
  270.         unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
  271.         unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE;
  272.         unsigned last_pte, i;
  273.  
  274.     pt_vaddr = (gen8_gtt_pte_t*)AllocKernelSpace(4096);
  275.     if(pt_vaddr == NULL)
  276.         return;
  277.  
  278.     scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
  279.                       I915_CACHE_LLC, use_scratch);
  280.  
  281.         while (num_entries) {
  282.                 struct page *page_table = &ppgtt->gen8_pt_pages[act_pt];
  283.  
  284.                 last_pte = first_pte + num_entries;
  285.                 if (last_pte > GEN8_PTES_PER_PAGE)
  286.                         last_pte = GEN8_PTES_PER_PAGE;
  287.  
  288.         MapPage(pt_vaddr,(addr_t)(ppgtt->pt_pages[act_pt]), 3);
  289.  
  290.                 for (i = first_pte; i < last_pte; i++)
  291.                         pt_vaddr[i] = scratch_pte;
  292.  
  293.                 num_entries -= last_pte - first_pte;
  294.                 first_pte = 0;
  295.                 act_pt++;
  296.         }
  297.     FreeKernelSpace(pt_vaddr);
  298. }
  299.  
  300. static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
  301.                                       struct sg_table *pages,
  302.                                       unsigned first_entry,
  303.                                       enum i915_cache_level cache_level)
  304. {
  305.         struct i915_hw_ppgtt *ppgtt =
  306.                 container_of(vm, struct i915_hw_ppgtt, base);
  307.         gen8_gtt_pte_t *pt_vaddr;
  308.         unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
  309.         unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE;
  310.         struct sg_page_iter sg_iter;
  311.  
  312.     pt_vaddr = AllocKernelSpace(4096);
  313.     if(pt_vaddr == NULL)
  314.         return;
  315.  
  316.     MapPage(pt_vaddr,(addr_t)(ppgtt->pt_pages[act_pt]), 3);
  317.  
  318.         for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
  319.  
  320.                 pt_vaddr[act_pte] =
  321.                         gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
  322.                                         cache_level, true);
  323.                 if (++act_pte == GEN8_PTES_PER_PAGE) {
  324.                         act_pt++;
  325.             MapPage(pt_vaddr,(addr_t)(ppgtt->pt_pages[act_pt]), 3);
  326.                         act_pte = 0;
  327.                 }
  328.         }
  329.     FreeKernelSpace(pt_vaddr);
  330. }
  331.  
  332. static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
  333. {
  334.         struct i915_hw_ppgtt *ppgtt =
  335.                 container_of(vm, struct i915_hw_ppgtt, base);
  336.         int i, j;
  337.  
  338.         drm_mm_takedown(&vm->mm);
  339.  
  340.         for (i = 0; i < ppgtt->num_pd_pages ; i++) {
  341.                 if (ppgtt->pd_dma_addr[i]) {
  342.                         pci_unmap_page(ppgtt->base.dev->pdev,
  343.                                        ppgtt->pd_dma_addr[i],
  344.                                        PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  345.  
  346.                         for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
  347.                                 dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
  348.                                 if (addr)
  349.                                         pci_unmap_page(ppgtt->base.dev->pdev,
  350.                                                        addr,
  351.                                                        PAGE_SIZE,
  352.                                                        PCI_DMA_BIDIRECTIONAL);
  353.  
  354.                         }
  355.                 }
  356.                 kfree(ppgtt->gen8_pt_dma_addr[i]);
  357.         }
  358.  
  359. //   __free_pages(ppgtt->gen8_pt_pages, get_order(ppgtt->num_pt_pages << PAGE_SHIFT));
  360. //   __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT));
  361. }
  362.  
  363. /**
  364.  * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a
  365.  * net effect resembling a 2-level page table in normal x86 terms. Each PDP
  366.  * represents 1GB of memory
  367.  * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space.
  368.  *
  369.  * TODO: Do something with the size parameter
  370.  **/
  371. static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
  372. {
  373.         struct page *pt_pages;
  374.         int i, j, ret = -ENOMEM;
  375.         const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
  376.         const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
  377.  
  378.         if (size % (1<<30))
  379.                 DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
  380.  
  381.         /* FIXME: split allocation into smaller pieces. For now we only ever do
  382.          * this once, but with full PPGTT, the multiple contiguous allocations
  383.          * will be bad.
  384.          */
  385.     ppgtt->pd_pages = AllocPages(max_pdp);
  386.         if (!ppgtt->pd_pages)
  387.                 return -ENOMEM;
  388.  
  389.     pt_pages = AllocPages(num_pt_pages);
  390.         if (!pt_pages) {
  391. //       __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT));
  392.                 return -ENOMEM;
  393.         }
  394.  
  395.         ppgtt->gen8_pt_pages = pt_pages;
  396.     ppgtt->num_pd_pages = max_pdp;
  397.     ppgtt->num_pt_pages = num_pt_pages;
  398.         ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
  399.         ppgtt->enable = gen8_ppgtt_enable;
  400.         ppgtt->base.clear_range = gen8_ppgtt_clear_range;
  401.         ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
  402.         ppgtt->base.cleanup = gen8_ppgtt_cleanup;
  403.         ppgtt->base.start = 0;
  404.         ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE;
  405.  
  406.         BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
  407.  
  408.         /*
  409.          * - Create a mapping for the page directories.
  410.          * - For each page directory:
  411.          *      allocate space for page table mappings.
  412.          *      map each page table
  413.          */
  414.         for (i = 0; i < max_pdp; i++) {
  415.                 dma_addr_t temp;
  416.                 temp = pci_map_page(ppgtt->base.dev->pdev,
  417.                                     &ppgtt->pd_pages[i], 0,
  418.                                     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  419.  
  420.                 ppgtt->pd_dma_addr[i] = temp;
  421.  
  422.                 ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, GFP_KERNEL);
  423.                 if (!ppgtt->gen8_pt_dma_addr[i])
  424.                         goto err_out;
  425.  
  426.                 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
  427.                         struct page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j];
  428.                         temp = pci_map_page(ppgtt->base.dev->pdev,
  429.                                             p, 0, PAGE_SIZE,
  430.                                             PCI_DMA_BIDIRECTIONAL);
  431.  
  432.             ppgtt->gen8_pt_dma_addr[i][j] = temp;
  433.                 }
  434.         }
  435.  
  436.         /* For now, the PPGTT helper functions all require that the PDEs are
  437.          * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
  438.          * will never need to touch the PDEs again */
  439.  
  440.     gen8_ppgtt_pde_t *pd_vaddr;
  441.     pd_vaddr = AllocKernelSpace(4096);
  442.  
  443.     for (i = 0; i < max_pdp; i++) {
  444.         MapPage(pd_vaddr,(addr_t)(ppgtt->pd_pages[i]), 3);
  445.                 for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
  446.                         dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
  447.                         pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
  448.                                                       I915_CACHE_LLC);
  449.                 }
  450.         }
  451.     FreeKernelSpace(pd_vaddr);
  452.  
  453.         ppgtt->base.clear_range(&ppgtt->base, 0,
  454.                                 ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE,
  455.                                 true);
  456.  
  457.         DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
  458.                          ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
  459.         DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
  460.                          ppgtt->num_pt_pages,
  461.                          (ppgtt->num_pt_pages - num_pt_pages) +
  462.                          size % (1<<30));
  463.         return 0;
  464.  
  465. err_out:
  466.         ppgtt->base.cleanup(&ppgtt->base);
  467.         return ret;
  468. }
  469.  
  470. static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
  471. {
  472.         struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
  473.         gen6_gtt_pte_t __iomem *pd_addr;
  474.         uint32_t pd_entry;
  475.         int i;
  476.  
  477.         WARN_ON(ppgtt->pd_offset & 0x3f);
  478.         pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
  479.                 ppgtt->pd_offset / sizeof(gen6_gtt_pte_t);
  480.         for (i = 0; i < ppgtt->num_pd_entries; i++) {
  481.                 dma_addr_t pt_addr;
  482.  
  483.                 pt_addr = ppgtt->pt_dma_addr[i];
  484.                 pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
  485.                 pd_entry |= GEN6_PDE_VALID;
  486.  
  487.                 writel(pd_entry, pd_addr + i);
  488.         }
  489.         readl(pd_addr);
  490. }
  491.  
  492. static int gen6_ppgtt_enable(struct drm_device *dev)
  493. {
  494.         drm_i915_private_t *dev_priv = dev->dev_private;
  495.         uint32_t pd_offset;
  496.         struct intel_ring_buffer *ring;
  497.         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
  498.         int i;
  499.  
  500.         BUG_ON(ppgtt->pd_offset & 0x3f);
  501.  
  502.         gen6_write_pdes(ppgtt);
  503.  
  504.         pd_offset = ppgtt->pd_offset;
  505.         pd_offset /= 64; /* in cachelines, */
  506.         pd_offset <<= 16;
  507.  
  508.         if (INTEL_INFO(dev)->gen == 6) {
  509.                 uint32_t ecochk, gab_ctl, ecobits;
  510.  
  511.                 ecobits = I915_READ(GAC_ECO_BITS);
  512.                 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
  513.                                          ECOBITS_PPGTT_CACHE64B);
  514.  
  515.                 gab_ctl = I915_READ(GAB_CTL);
  516.                 I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
  517.  
  518.                 ecochk = I915_READ(GAM_ECOCHK);
  519.                 I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
  520.                                        ECOCHK_PPGTT_CACHE64B);
  521.                 I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  522.         } else if (INTEL_INFO(dev)->gen >= 7) {
  523.                 uint32_t ecochk, ecobits;
  524.  
  525.                 ecobits = I915_READ(GAC_ECO_BITS);
  526.                 I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
  527.  
  528.                 ecochk = I915_READ(GAM_ECOCHK);
  529.                 if (IS_HASWELL(dev)) {
  530.                         ecochk |= ECOCHK_PPGTT_WB_HSW;
  531.                 } else {
  532.                         ecochk |= ECOCHK_PPGTT_LLC_IVB;
  533.                         ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
  534.                 }
  535.                 I915_WRITE(GAM_ECOCHK, ecochk);
  536.                 /* GFX_MODE is per-ring on gen7+ */
  537.         }
  538.  
  539.         for_each_ring(ring, dev_priv, i) {
  540.                 if (INTEL_INFO(dev)->gen >= 7)
  541.                         I915_WRITE(RING_MODE_GEN7(ring),
  542.                                    _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
  543.  
  544.                 I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
  545.                 I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
  546.         }
  547.         return 0;
  548. }
  549.  
  550. /* PPGTT support for Sandybdrige/Gen6 and later */
  551. static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
  552.                                    unsigned first_entry,
  553.                                    unsigned num_entries,
  554.                                    bool use_scratch)
  555. {
  556.         struct i915_hw_ppgtt *ppgtt =
  557.                 container_of(vm, struct i915_hw_ppgtt, base);
  558.         gen6_gtt_pte_t *pt_vaddr, scratch_pte;
  559.         unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
  560.         unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
  561.         unsigned last_pte, i;
  562.  
  563.         scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, true);
  564.  
  565.     pt_vaddr = AllocKernelSpace(4096);
  566.  
  567.     if(pt_vaddr == NULL)
  568.         return;
  569.  
  570.         while (num_entries) {
  571.             last_pte = first_pte + num_entries;
  572.             if (last_pte > I915_PPGTT_PT_ENTRIES)
  573.                 last_pte = I915_PPGTT_PT_ENTRIES;
  574.  
  575.             MapPage(pt_vaddr,(addr_t)(ppgtt->pt_pages[act_pt]), 3);
  576.  
  577.             for (i = first_pte; i < last_pte; i++)
  578.                 pt_vaddr[i] = scratch_pte;
  579.  
  580.             num_entries -= last_pte - first_pte;
  581.             first_pte = 0;
  582.             act_pt++;
  583.     };
  584.  
  585.     FreeKernelSpace(pt_vaddr);
  586. }
  587.  
  588. static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
  589.                                       struct sg_table *pages,
  590.                                       unsigned first_entry,
  591.                                       enum i915_cache_level cache_level)
  592. {
  593.         struct i915_hw_ppgtt *ppgtt =
  594.                 container_of(vm, struct i915_hw_ppgtt, base);
  595.         gen6_gtt_pte_t *pt_vaddr;
  596.         unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
  597.         unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
  598.         struct sg_page_iter sg_iter;
  599.         dma_addr_t page_addr;
  600.  
  601.  
  602.     pt_vaddr = AllocKernelSpace(4096);
  603.  
  604.     if(pt_vaddr == NULL)
  605.         return;
  606.  
  607.     MapPage(pt_vaddr,(addr_t)(ppgtt->pt_pages[act_pt]), 3);
  608.         for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
  609.  
  610.                 pt_vaddr[act_pte] =
  611.                         vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
  612.                                        cache_level, true);
  613.                 if (++act_pte == I915_PPGTT_PT_ENTRIES) {
  614.                         act_pt++;
  615.                 MapPage(pt_vaddr,(addr_t)(ppgtt->pt_pages[act_pt]), 3);
  616.                         act_pte = 0;
  617.  
  618.                         }
  619.                 }
  620.     FreeKernelSpace(pt_vaddr);
  621. }
  622.  
  623. static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
  624. {
  625.         struct i915_hw_ppgtt *ppgtt =
  626.                 container_of(vm, struct i915_hw_ppgtt, base);
  627.         int i;
  628.  
  629.         drm_mm_takedown(&ppgtt->base.mm);
  630.  
  631.         if (ppgtt->pt_dma_addr) {
  632.                 for (i = 0; i < ppgtt->num_pd_entries; i++)
  633.                         pci_unmap_page(ppgtt->base.dev->pdev,
  634.                                        ppgtt->pt_dma_addr[i],
  635.                                        4096, PCI_DMA_BIDIRECTIONAL);
  636.         }
  637.  
  638.         kfree(ppgtt->pt_dma_addr);
  639.         for (i = 0; i < ppgtt->num_pd_entries; i++)
  640.                 __free_page(ppgtt->pt_pages[i]);
  641.         kfree(ppgtt->pt_pages);
  642.         kfree(ppgtt);
  643. }
  644.  
  645. static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
  646. {
  647.         struct drm_device *dev = ppgtt->base.dev;
  648.         struct drm_i915_private *dev_priv = dev->dev_private;
  649.         unsigned first_pd_entry_in_global_pt;
  650.         int i;
  651.         int ret = -ENOMEM;
  652.  
  653.         /* ppgtt PDEs reside in the global gtt pagetable, which has 512*1024
  654.          * entries. For aliasing ppgtt support we just steal them at the end for
  655.          * now. */
  656.        first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt);
  657.  
  658.         ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
  659.         ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
  660.         ppgtt->enable = gen6_ppgtt_enable;
  661.         ppgtt->base.clear_range = gen6_ppgtt_clear_range;
  662.         ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
  663.         ppgtt->base.cleanup = gen6_ppgtt_cleanup;
  664.         ppgtt->base.scratch = dev_priv->gtt.base.scratch;
  665.         ppgtt->base.start = 0;
  666.         ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
  667.         ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *),
  668.                                   GFP_KERNEL);
  669.         if (!ppgtt->pt_pages)
  670.                 return -ENOMEM;
  671.  
  672.         for (i = 0; i < ppgtt->num_pd_entries; i++) {
  673.                 ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
  674.                 if (!ppgtt->pt_pages[i])
  675.                         goto err_pt_alloc;
  676.         }
  677.  
  678.         ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t),
  679.                                              GFP_KERNEL);
  680.     if (!ppgtt->pt_dma_addr)
  681.         goto err_pt_alloc;
  682.  
  683.     for (i = 0; i < ppgtt->num_pd_entries; i++) {
  684.         dma_addr_t pt_addr;
  685.  
  686.                 pt_addr = pci_map_page(dev->pdev, ppgtt->pt_pages[i], 0, 4096,
  687.                                                PCI_DMA_BIDIRECTIONAL);
  688.  
  689.         ppgtt->pt_dma_addr[i] = pt_addr;
  690.     }
  691.  
  692.         ppgtt->base.clear_range(&ppgtt->base, 0,
  693.                                 ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES, true);
  694.  
  695.         ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t);
  696.  
  697.         return 0;
  698.  
  699. err_pd_pin:
  700.         if (ppgtt->pt_dma_addr) {
  701.                 for (i--; i >= 0; i--)
  702.                         pci_unmap_page(dev->pdev, ppgtt->pt_dma_addr[i],
  703.                                        4096, PCI_DMA_BIDIRECTIONAL);
  704.         }
  705. err_pt_alloc:
  706.         kfree(ppgtt->pt_dma_addr);
  707.         for (i = 0; i < ppgtt->num_pd_entries; i++) {
  708.                 if (ppgtt->pt_pages[i])
  709.                         __free_page(ppgtt->pt_pages[i]);
  710.         }
  711.         kfree(ppgtt->pt_pages);
  712.  
  713.         return ret;
  714. }
  715.  
  716. static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev)
  717. {
  718.         struct drm_i915_private *dev_priv = dev->dev_private;
  719.         struct i915_hw_ppgtt *ppgtt;
  720.         int ret;
  721.  
  722.         ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
  723.         if (!ppgtt)
  724.                 return -ENOMEM;
  725.  
  726.         ppgtt->base.dev = dev;
  727.  
  728.         if (INTEL_INFO(dev)->gen < 8)
  729.         ret = gen6_ppgtt_init(ppgtt);
  730.         else if (IS_GEN8(dev))
  731.                 ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
  732.         else
  733.                 BUG();
  734.  
  735.         if (ret)
  736.         kfree(ppgtt);
  737.         else {
  738.                 dev_priv->mm.aliasing_ppgtt = ppgtt;
  739.                 drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
  740.                             ppgtt->base.total);
  741.         }
  742.  
  743.         return ret;
  744. }
  745.  
  746. void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
  747. {
  748.         struct drm_i915_private *dev_priv = dev->dev_private;
  749.         struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
  750.  
  751.         if (!ppgtt)
  752.                 return;
  753.  
  754.         ppgtt->base.cleanup(&ppgtt->base);
  755.         dev_priv->mm.aliasing_ppgtt = NULL;
  756. }
  757.  
  758. void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
  759.                             struct drm_i915_gem_object *obj,
  760.                             enum i915_cache_level cache_level)
  761. {
  762.         ppgtt->base.insert_entries(&ppgtt->base, obj->pages,
  763.                                    i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
  764.                                      cache_level);
  765. }
  766.  
  767. void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
  768.                               struct drm_i915_gem_object *obj)
  769. {
  770.         ppgtt->base.clear_range(&ppgtt->base,
  771.                                 i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
  772.                                 obj->base.size >> PAGE_SHIFT,
  773.                                 true);
  774. }
  775.  
  776. extern int intel_iommu_gfx_mapped;
  777. /* Certain Gen5 chipsets require require idling the GPU before
  778.  * unmapping anything from the GTT when VT-d is enabled.
  779.  */
  780. static inline bool needs_idle_maps(struct drm_device *dev)
  781. {
  782. #ifdef CONFIG_INTEL_IOMMU
  783.         /* Query intel_iommu to see if we need the workaround. Presumably that
  784.          * was loaded first.
  785.          */
  786.         if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
  787.                 return true;
  788. #endif
  789.         return false;
  790. }
  791.  
  792. static bool do_idling(struct drm_i915_private *dev_priv)
  793. {
  794.         bool ret = dev_priv->mm.interruptible;
  795.  
  796.         if (unlikely(dev_priv->gtt.do_idle_maps)) {
  797.                 dev_priv->mm.interruptible = false;
  798.                 if (i915_gpu_idle(dev_priv->dev)) {
  799.                         DRM_ERROR("Couldn't idle GPU\n");
  800.                         /* Wait a bit, in hopes it avoids the hang */
  801.                         udelay(10);
  802.                 }
  803.         }
  804.  
  805.         return ret;
  806. }
  807.  
  808. static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
  809. {
  810.         if (unlikely(dev_priv->gtt.do_idle_maps))
  811.                 dev_priv->mm.interruptible = interruptible;
  812. }
  813.  
  814. void i915_check_and_clear_faults(struct drm_device *dev)
  815. {
  816.         struct drm_i915_private *dev_priv = dev->dev_private;
  817.         struct intel_ring_buffer *ring;
  818.         int i;
  819.  
  820.         if (INTEL_INFO(dev)->gen < 6)
  821.                 return;
  822.  
  823.         for_each_ring(ring, dev_priv, i) {
  824.                 u32 fault_reg;
  825.                 fault_reg = I915_READ(RING_FAULT_REG(ring));
  826.                 if (fault_reg & RING_FAULT_VALID) {
  827.                         DRM_DEBUG_DRIVER("Unexpected fault\n"
  828.                                          "\tAddr: 0x%08lx\\n"
  829.                                          "\tAddress space: %s\n"
  830.                                          "\tSource ID: %d\n"
  831.                                          "\tType: %d\n",
  832.                                          fault_reg & PAGE_MASK,
  833.                                          fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
  834.                                          RING_FAULT_SRCID(fault_reg),
  835.                                          RING_FAULT_FAULT_TYPE(fault_reg));
  836.                         I915_WRITE(RING_FAULT_REG(ring),
  837.                                    fault_reg & ~RING_FAULT_VALID);
  838.                 }
  839.         }
  840.         POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
  841. }
  842.  
  843. void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
  844. {
  845.         struct drm_i915_private *dev_priv = dev->dev_private;
  846.  
  847.         /* Don't bother messing with faults pre GEN6 as we have little
  848.          * documentation supporting that it's a good idea.
  849.          */
  850.         if (INTEL_INFO(dev)->gen < 6)
  851.                 return;
  852.  
  853.         i915_check_and_clear_faults(dev);
  854.  
  855.         dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
  856.                                        dev_priv->gtt.base.start / PAGE_SIZE,
  857.                                        dev_priv->gtt.base.total / PAGE_SIZE,
  858.                                        false);
  859. }
  860.  
  861. void i915_gem_restore_gtt_mappings(struct drm_device *dev)
  862. {
  863.         struct drm_i915_private *dev_priv = dev->dev_private;
  864.         struct drm_i915_gem_object *obj;
  865.  
  866.         i915_check_and_clear_faults(dev);
  867.  
  868.         /* First fill our portion of the GTT with scratch pages */
  869.         dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
  870.                                        dev_priv->gtt.base.start / PAGE_SIZE,
  871.                                        dev_priv->gtt.base.total / PAGE_SIZE,
  872.                                        true);
  873.  
  874.         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
  875.                 i915_gem_clflush_object(obj, obj->pin_display);
  876.                 i915_gem_gtt_bind_object(obj, obj->cache_level);
  877.         }
  878.  
  879.         i915_gem_chipset_flush(dev);
  880. }
  881.  
  882. int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
  883. {
  884.         if (obj->has_dma_mapping)
  885.                 return 0;
  886.  
  887.         if (!dma_map_sg(&obj->base.dev->pdev->dev,
  888.                         obj->pages->sgl, obj->pages->nents,
  889.                         PCI_DMA_BIDIRECTIONAL))
  890.                 return -ENOSPC;
  891.  
  892.         return 0;
  893. }
  894.  
  895. static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
  896. {
  897. #ifdef writeq
  898.         writeq(pte, addr);
  899. #else
  900.         iowrite32((u32)pte, addr);
  901.         iowrite32(pte >> 32, addr + 4);
  902. #endif
  903. }
  904.  
  905. static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
  906.                                      struct sg_table *st,
  907.                                      unsigned int first_entry,
  908.                                      enum i915_cache_level level)
  909. {
  910.         struct drm_i915_private *dev_priv = vm->dev->dev_private;
  911.         gen8_gtt_pte_t __iomem *gtt_entries =
  912.                 (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
  913.         int i = 0;
  914.         struct sg_page_iter sg_iter;
  915.         dma_addr_t addr;
  916.  
  917.         for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
  918.                 addr = sg_dma_address(sg_iter.sg) +
  919.                         (sg_iter.sg_pgoffset << PAGE_SHIFT);
  920.                 gen8_set_pte(&gtt_entries[i],
  921.                              gen8_pte_encode(addr, level, true));
  922.                 i++;
  923.         }
  924.  
  925.         /*
  926.          * XXX: This serves as a posting read to make sure that the PTE has
  927.          * actually been updated. There is some concern that even though
  928.          * registers and PTEs are within the same BAR that they are potentially
  929.          * of NUMA access patterns. Therefore, even with the way we assume
  930.          * hardware should work, we must keep this posting read for paranoia.
  931.          */
  932.         if (i != 0)
  933.                 WARN_ON(readq(&gtt_entries[i-1])
  934.                         != gen8_pte_encode(addr, level, true));
  935.  
  936.         /* This next bit makes the above posting read even more important. We
  937.          * want to flush the TLBs only after we're certain all the PTE updates
  938.          * have finished.
  939.          */
  940.         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  941.         POSTING_READ(GFX_FLSH_CNTL_GEN6);
  942. }
  943.  
  944. /*
  945.  * Binds an object into the global gtt with the specified cache level. The object
  946.  * will be accessible to the GPU via commands whose operands reference offsets
  947.  * within the global GTT as well as accessible by the GPU through the GMADR
  948.  * mapped BAR (dev_priv->mm.gtt->gtt).
  949.  */
  950. static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
  951.                                      struct sg_table *st,
  952.                                      unsigned int first_entry,
  953.                                   enum i915_cache_level level)
  954. {
  955.         struct drm_i915_private *dev_priv = vm->dev->dev_private;
  956.         gen6_gtt_pte_t __iomem *gtt_entries =
  957.                 (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
  958.         int i = 0;
  959.         struct sg_page_iter sg_iter;
  960.         dma_addr_t addr;
  961.  
  962.         for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
  963.                 addr = sg_page_iter_dma_address(&sg_iter);
  964.                 iowrite32(vm->pte_encode(addr, level, true), &gtt_entries[i]);
  965.                         i++;
  966.                 }
  967.  
  968.         /* XXX: This serves as a posting read to make sure that the PTE has
  969.          * actually been updated. There is some concern that even though
  970.          * registers and PTEs are within the same BAR that they are potentially
  971.          * of NUMA access patterns. Therefore, even with the way we assume
  972.          * hardware should work, we must keep this posting read for paranoia.
  973.          */
  974.         if (i != 0)
  975.                 WARN_ON(readl(&gtt_entries[i-1]) !=
  976.                         vm->pte_encode(addr, level, true));
  977.  
  978.         /* This next bit makes the above posting read even more important. We
  979.          * want to flush the TLBs only after we're certain all the PTE updates
  980.          * have finished.
  981.          */
  982.         I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
  983.         POSTING_READ(GFX_FLSH_CNTL_GEN6);
  984. }
  985.  
  986. static void gen8_ggtt_clear_range(struct i915_address_space *vm,
  987.                                   unsigned int first_entry,
  988.                                   unsigned int num_entries,
  989.                                   bool use_scratch)
  990. {
  991.         struct drm_i915_private *dev_priv = vm->dev->dev_private;
  992.         gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
  993.                 (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
  994.         const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
  995.         int i;
  996.  
  997.         if (WARN(num_entries > max_entries,
  998.                  "First entry = %d; Num entries = %d (max=%d)\n",
  999.                  first_entry, num_entries, max_entries))
  1000.                 num_entries = max_entries;
  1001.  
  1002.         scratch_pte = gen8_pte_encode(vm->scratch.addr,
  1003.                                       I915_CACHE_LLC,
  1004.                                       use_scratch);
  1005.         for (i = 0; i < num_entries; i++)
  1006.                 gen8_set_pte(&gtt_base[i], scratch_pte);
  1007.         readl(gtt_base);
  1008. }
  1009.  
  1010. static void gen6_ggtt_clear_range(struct i915_address_space *vm,
  1011.                                   unsigned int first_entry,
  1012.                                   unsigned int num_entries,
  1013.                                   bool use_scratch)
  1014. {
  1015.         struct drm_i915_private *dev_priv = vm->dev->dev_private;
  1016.         gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
  1017.                 (gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
  1018.         const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
  1019.         int i;
  1020.  
  1021.         if (WARN(num_entries > max_entries,
  1022.                  "First entry = %d; Num entries = %d (max=%d)\n",
  1023.                  first_entry, num_entries, max_entries))
  1024.         num_entries = max_entries;
  1025.  
  1026.         scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC, use_scratch);
  1027.  
  1028.         for (i = 0; i < num_entries; i++)
  1029.                 iowrite32(scratch_pte, &gtt_base[i]);
  1030.         readl(gtt_base);
  1031. }
  1032.  
  1033. static void i915_ggtt_insert_entries(struct i915_address_space *vm,
  1034.                                      struct sg_table *st,
  1035.                                      unsigned int pg_start,
  1036.                                      enum i915_cache_level cache_level)
  1037. {
  1038.         unsigned int flags = (cache_level == I915_CACHE_NONE) ?
  1039.                 AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
  1040.  
  1041.         intel_gtt_insert_sg_entries(st, pg_start, flags);
  1042.  
  1043. }
  1044.  
  1045. static void i915_ggtt_clear_range(struct i915_address_space *vm,
  1046.                                   unsigned int first_entry,
  1047.                                   unsigned int num_entries,
  1048.                                   bool unused)
  1049. {
  1050.         intel_gtt_clear_range(first_entry, num_entries);
  1051. }
  1052.  
  1053.  
  1054. void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
  1055.                                 enum i915_cache_level cache_level)
  1056. {
  1057.         struct drm_device *dev = obj->base.dev;
  1058.         struct drm_i915_private *dev_priv = dev->dev_private;
  1059.         const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
  1060.  
  1061.         dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages,
  1062.                                           entry,
  1063.                                          cache_level);
  1064.  
  1065.         obj->has_global_gtt_mapping = 1;
  1066. }
  1067.  
  1068. void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
  1069. {
  1070.         struct drm_device *dev = obj->base.dev;
  1071.         struct drm_i915_private *dev_priv = dev->dev_private;
  1072.         const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
  1073.  
  1074.         dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
  1075.                                        entry,
  1076.                                        obj->base.size >> PAGE_SHIFT,
  1077.                                        true);
  1078.  
  1079.         obj->has_global_gtt_mapping = 0;
  1080. }
  1081.  
  1082. void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
  1083. {
  1084.         struct drm_device *dev = obj->base.dev;
  1085.         struct drm_i915_private *dev_priv = dev->dev_private;
  1086.         bool interruptible;
  1087.  
  1088.         interruptible = do_idling(dev_priv);
  1089.  
  1090.         if (!obj->has_dma_mapping)
  1091.                 dma_unmap_sg(&dev->pdev->dev,
  1092.                              obj->pages->sgl, obj->pages->nents,
  1093.                              PCI_DMA_BIDIRECTIONAL);
  1094.  
  1095.         undo_idling(dev_priv, interruptible);
  1096. }
  1097.  
  1098. static void i915_gtt_color_adjust(struct drm_mm_node *node,
  1099.                                   unsigned long color,
  1100.                                   unsigned long *start,
  1101.                                   unsigned long *end)
  1102. {
  1103.         if (node->color != color)
  1104.                 *start += 4096;
  1105.  
  1106.         if (!list_empty(&node->node_list)) {
  1107.                 node = list_entry(node->node_list.next,
  1108.                                   struct drm_mm_node,
  1109.                                   node_list);
  1110.                 if (node->allocated && node->color != color)
  1111.                         *end -= 4096;
  1112.         }
  1113. }
  1114.  
  1115. void i915_gem_setup_global_gtt(struct drm_device *dev,
  1116.                               unsigned long start,
  1117.                               unsigned long mappable_end,
  1118.                               unsigned long end)
  1119. {
  1120.         /* Let GEM Manage all of the aperture.
  1121.          *
  1122.          * However, leave one page at the end still bound to the scratch page.
  1123.          * There are a number of places where the hardware apparently prefetches
  1124.          * past the end of the object, and we've seen multiple hangs with the
  1125.          * GPU head pointer stuck in a batchbuffer bound at the last page of the
  1126.          * aperture.  One page should be enough to keep any prefetching inside
  1127.          * of the aperture.
  1128.          */
  1129.         struct drm_i915_private *dev_priv = dev->dev_private;
  1130.         struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
  1131.         struct drm_mm_node *entry;
  1132.         struct drm_i915_gem_object *obj;
  1133.         unsigned long hole_start, hole_end;
  1134.  
  1135.         BUG_ON(mappable_end > end);
  1136.  
  1137.         /* Subtract the guard page ... */
  1138.         drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
  1139.         if (!HAS_LLC(dev))
  1140.                 dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
  1141.  
  1142.         /* Mark any preallocated objects as occupied */
  1143.         list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
  1144.                 struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
  1145.                 int ret;
  1146.                 DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
  1147.                               i915_gem_obj_ggtt_offset(obj), obj->base.size);
  1148.  
  1149.                 WARN_ON(i915_gem_obj_ggtt_bound(obj));
  1150.                 ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
  1151.                 if (ret)
  1152.                         DRM_DEBUG_KMS("Reservation failed\n");
  1153.                 obj->has_global_gtt_mapping = 1;
  1154.         }
  1155.  
  1156.         dev_priv->gtt.base.start = start;
  1157.         dev_priv->gtt.base.total = end - start;
  1158.  
  1159.         /* Clear any non-preallocated blocks */
  1160.         drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
  1161.                 const unsigned long count = (hole_end - hole_start) / PAGE_SIZE;
  1162.                 DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
  1163.                               hole_start, hole_end);
  1164.                 ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count, true);
  1165.         }
  1166.  
  1167.         /* And finally clear the reserved guard page */
  1168.         ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1, true);
  1169. }
  1170.  
  1171. static bool
  1172. intel_enable_ppgtt(struct drm_device *dev)
  1173. {
  1174.         if (i915_enable_ppgtt >= 0)
  1175.                 return i915_enable_ppgtt;
  1176.  
  1177. #ifdef CONFIG_INTEL_IOMMU
  1178.         /* Disable ppgtt on SNB if VT-d is on. */
  1179.         if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped)
  1180.                 return false;
  1181. #endif
  1182.  
  1183.         return true;
  1184. }
  1185.  
  1186. void i915_gem_init_global_gtt(struct drm_device *dev)
  1187. {
  1188.         struct drm_i915_private *dev_priv = dev->dev_private;
  1189.         unsigned long gtt_size, mappable_size;
  1190.  
  1191.         gtt_size = dev_priv->gtt.base.total;
  1192.         mappable_size = dev_priv->gtt.mappable_end;
  1193.  
  1194.         if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
  1195.                 int ret;
  1196.  
  1197.                 if (INTEL_INFO(dev)->gen <= 7) {
  1198.                 /* PPGTT pdes are stolen from global gtt ptes, so shrink the
  1199.                  * aperture accordingly when using aliasing ppgtt. */
  1200.                         gtt_size -= GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE;
  1201.                 }
  1202.  
  1203.                 i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
  1204.  
  1205.                 ret = i915_gem_init_aliasing_ppgtt(dev);
  1206.                 if (!ret)
  1207.                         return;
  1208.  
  1209.                 DRM_ERROR("Aliased PPGTT setup failed %d\n", ret);
  1210.                 drm_mm_takedown(&dev_priv->gtt.base.mm);
  1211.                 if (INTEL_INFO(dev)->gen < 8)
  1212.                         gtt_size += GEN6_PPGTT_PD_ENTRIES*PAGE_SIZE;
  1213.         }
  1214.         i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
  1215. }
  1216.  
  1217. static int setup_scratch_page(struct drm_device *dev)
  1218. {
  1219.         struct drm_i915_private *dev_priv = dev->dev_private;
  1220.         struct page *page;
  1221.         dma_addr_t dma_addr;
  1222.  
  1223.         page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO);
  1224.         if (page == NULL)
  1225.                 return -ENOMEM;
  1226.     get_page(page);
  1227.         set_pages_uc(page, 1);
  1228.  
  1229. #ifdef CONFIG_INTEL_IOMMU
  1230.         dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE,
  1231.                                 PCI_DMA_BIDIRECTIONAL);
  1232.         if (pci_dma_mapping_error(dev->pdev, dma_addr))
  1233.                 return -EINVAL;
  1234. #else
  1235.         dma_addr = page_to_phys(page);
  1236. #endif
  1237.         dev_priv->gtt.base.scratch.page = page;
  1238.         dev_priv->gtt.base.scratch.addr = dma_addr;
  1239.  
  1240.         return 0;
  1241. }
  1242.  
  1243. static void teardown_scratch_page(struct drm_device *dev)
  1244. {
  1245.         struct drm_i915_private *dev_priv = dev->dev_private;
  1246.         struct page *page = dev_priv->gtt.base.scratch.page;
  1247.  
  1248.         set_pages_wb(page, 1);
  1249.         pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
  1250.                        PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
  1251.         put_page(page);
  1252.         __free_page(page);
  1253. }
  1254.  
  1255. static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
  1256. {
  1257.         snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
  1258.         snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
  1259.         return snb_gmch_ctl << 20;
  1260. }
  1261.  
  1262. static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
  1263. {
  1264.         bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
  1265.         bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
  1266.         if (bdw_gmch_ctl)
  1267.                 bdw_gmch_ctl = 1 << bdw_gmch_ctl;
  1268.         if (bdw_gmch_ctl > 4) {
  1269.                 WARN_ON(!i915_preliminary_hw_support);
  1270.                 return 4<<20;
  1271.         }
  1272.  
  1273.         return bdw_gmch_ctl << 20;
  1274. }
  1275.  
  1276. static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
  1277. {
  1278.         snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
  1279.         snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
  1280.         return snb_gmch_ctl << 25; /* 32 MB units */
  1281. }
  1282.  
  1283. static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
  1284. {
  1285.         bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
  1286.         bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
  1287.         return bdw_gmch_ctl << 25; /* 32 MB units */
  1288. }
  1289.  
  1290. static int ggtt_probe_common(struct drm_device *dev,
  1291.                              size_t gtt_size)
  1292. {
  1293.         struct drm_i915_private *dev_priv = dev->dev_private;
  1294.         phys_addr_t gtt_phys_addr;
  1295.         int ret;
  1296.  
  1297.         /* For Modern GENs the PTEs and register space are split in the BAR */
  1298.         gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
  1299.                 (pci_resource_len(dev->pdev, 0) / 2);
  1300.  
  1301.         dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
  1302.         if (!dev_priv->gtt.gsm) {
  1303.                 DRM_ERROR("Failed to map the gtt page table\n");
  1304.                 return -ENOMEM;
  1305.         }
  1306.  
  1307.         ret = setup_scratch_page(dev);
  1308.         if (ret) {
  1309.                 DRM_ERROR("Scratch setup failed\n");
  1310.                 /* iounmap will also get called at remove, but meh */
  1311.                 iounmap(dev_priv->gtt.gsm);
  1312.         }
  1313.  
  1314.         return ret;
  1315. }
  1316.  
  1317. /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
  1318.  * bits. When using advanced contexts each context stores its own PAT, but
  1319.  * writing this data shouldn't be harmful even in those cases. */
  1320. static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
  1321. {
  1322. #define GEN8_PPAT_UC            (0<<0)
  1323. #define GEN8_PPAT_WC            (1<<0)
  1324. #define GEN8_PPAT_WT            (2<<0)
  1325. #define GEN8_PPAT_WB            (3<<0)
  1326. #define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
  1327. /* FIXME(BDW): Bspec is completely confused about cache control bits. */
  1328. #define GEN8_PPAT_LLC           (1<<2)
  1329. #define GEN8_PPAT_LLCELLC       (2<<2)
  1330. #define GEN8_PPAT_LLCeLLC       (3<<2)
  1331. #define GEN8_PPAT_AGE(x)        (x<<4)
  1332. #define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
  1333.         uint64_t pat;
  1334.  
  1335.         pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
  1336.               GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
  1337.               GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
  1338.               GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
  1339.               GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
  1340.               GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
  1341.               GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
  1342.               GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
  1343.  
  1344.         /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
  1345.          * write would work. */
  1346.         I915_WRITE(GEN8_PRIVATE_PAT, pat);
  1347.         I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
  1348. }
  1349.  
  1350. static int gen8_gmch_probe(struct drm_device *dev,
  1351.                            size_t *gtt_total,
  1352.                            size_t *stolen,
  1353.                            phys_addr_t *mappable_base,
  1354.                            unsigned long *mappable_end)
  1355. {
  1356.         struct drm_i915_private *dev_priv = dev->dev_private;
  1357.         unsigned int gtt_size;
  1358.         u16 snb_gmch_ctl;
  1359.         int ret;
  1360.  
  1361.         /* TODO: We're not aware of mappable constraints on gen8 yet */
  1362.         *mappable_base = pci_resource_start(dev->pdev, 2);
  1363.         *mappable_end = pci_resource_len(dev->pdev, 2);
  1364.  
  1365.         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
  1366.                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
  1367.  
  1368.         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
  1369.  
  1370.         *stolen = gen8_get_stolen_size(snb_gmch_ctl);
  1371.  
  1372.         gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
  1373.         *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
  1374.  
  1375.         gen8_setup_private_ppat(dev_priv);
  1376.  
  1377.         ret = ggtt_probe_common(dev, gtt_size);
  1378.  
  1379.         dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
  1380.         dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
  1381.  
  1382.         return ret;
  1383. }
  1384.  
  1385. static int gen6_gmch_probe(struct drm_device *dev,
  1386.                            size_t *gtt_total,
  1387.                            size_t *stolen,
  1388.                            phys_addr_t *mappable_base,
  1389.                            unsigned long *mappable_end)
  1390. {
  1391.         struct drm_i915_private *dev_priv = dev->dev_private;
  1392.         unsigned int gtt_size;
  1393.         u16 snb_gmch_ctl;
  1394.         int ret;
  1395.  
  1396.         *mappable_base = pci_resource_start(dev->pdev, 2);
  1397.         *mappable_end = pci_resource_len(dev->pdev, 2);
  1398.  
  1399.         /* 64/512MB is the current min/max we actually know of, but this is just
  1400.          * a coarse sanity check.
  1401.          */
  1402.         if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
  1403.                 DRM_ERROR("Unknown GMADR size (%lx)\n",
  1404.                           dev_priv->gtt.mappable_end);
  1405.                 return -ENXIO;
  1406.                 }
  1407.  
  1408.         if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
  1409.                 pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
  1410.         pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
  1411.  
  1412.         *stolen = gen6_get_stolen_size(snb_gmch_ctl);
  1413.  
  1414.         gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
  1415.         *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
  1416.  
  1417.         ret = ggtt_probe_common(dev, gtt_size);
  1418.  
  1419.         dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
  1420.         dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
  1421.  
  1422.         return ret;
  1423. }
  1424.  
  1425. static void gen6_gmch_remove(struct i915_address_space *vm)
  1426. {
  1427.  
  1428.         struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
  1429.         iounmap(gtt->gsm);
  1430.         teardown_scratch_page(vm->dev);
  1431. }
  1432.  
  1433. static int i915_gmch_probe(struct drm_device *dev,
  1434.                            size_t *gtt_total,
  1435.                            size_t *stolen,
  1436.                            phys_addr_t *mappable_base,
  1437.                            unsigned long *mappable_end)
  1438. {
  1439.         struct drm_i915_private *dev_priv = dev->dev_private;
  1440.         int ret;
  1441.  
  1442.         ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
  1443.         if (!ret) {
  1444.                 DRM_ERROR("failed to set up gmch\n");
  1445.                 return -EIO;
  1446.         }
  1447.  
  1448.         intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
  1449.  
  1450.         dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
  1451.         dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
  1452.         dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
  1453.  
  1454.         if (unlikely(dev_priv->gtt.do_idle_maps))
  1455.                 DRM_INFO("applying Ironlake quirks for intel_iommu\n");
  1456.  
  1457.         return 0;
  1458. }
  1459.  
  1460. static void i915_gmch_remove(struct i915_address_space *vm)
  1461. {
  1462. //      intel_gmch_remove();
  1463. }
  1464.  
  1465. int i915_gem_gtt_init(struct drm_device *dev)
  1466. {
  1467.         struct drm_i915_private *dev_priv = dev->dev_private;
  1468.         struct i915_gtt *gtt = &dev_priv->gtt;
  1469.         int ret;
  1470.  
  1471.         if (INTEL_INFO(dev)->gen <= 5) {
  1472.                 gtt->gtt_probe = i915_gmch_probe;
  1473.                 gtt->base.cleanup = i915_gmch_remove;
  1474.         } else if (INTEL_INFO(dev)->gen < 8) {
  1475.                 gtt->gtt_probe = gen6_gmch_probe;
  1476.                 gtt->base.cleanup = gen6_gmch_remove;
  1477.                 if (IS_HASWELL(dev) && dev_priv->ellc_size)
  1478.                         gtt->base.pte_encode = iris_pte_encode;
  1479.                 else if (IS_HASWELL(dev))
  1480.                         gtt->base.pte_encode = hsw_pte_encode;
  1481.                 else if (IS_VALLEYVIEW(dev))
  1482.                         gtt->base.pte_encode = byt_pte_encode;
  1483.                 else if (INTEL_INFO(dev)->gen >= 7)
  1484.                         gtt->base.pte_encode = ivb_pte_encode;
  1485.                 else
  1486.                         gtt->base.pte_encode = snb_pte_encode;
  1487.         } else {
  1488.                 dev_priv->gtt.gtt_probe = gen8_gmch_probe;
  1489.                 dev_priv->gtt.base.cleanup = gen6_gmch_remove;
  1490.         }
  1491.  
  1492.         ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
  1493.                              &gtt->mappable_base, &gtt->mappable_end);
  1494.         if (ret)
  1495.                 return ret;
  1496.  
  1497.         gtt->base.dev = dev;
  1498.  
  1499.         /* GMADR is the PCI mmio aperture into the global GTT. */
  1500.         DRM_INFO("Memory usable by graphics device = %zdM\n",
  1501.                  gtt->base.total >> 20);
  1502.         DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
  1503.         DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
  1504.  
  1505.         return 0;
  1506. }
  1507.  
  1508. struct scatterlist *sg_next(struct scatterlist *sg)
  1509. {
  1510.     if (sg_is_last(sg))
  1511.         return NULL;
  1512.  
  1513.     sg++;
  1514.     if (unlikely(sg_is_chain(sg)))
  1515.             sg = sg_chain_ptr(sg);
  1516.  
  1517.     return sg;
  1518. }
  1519.  
  1520.  
  1521. void __sg_free_table(struct sg_table *table, unsigned int max_ents,
  1522.                      sg_free_fn *free_fn)
  1523. {
  1524.     struct scatterlist *sgl, *next;
  1525.  
  1526.     if (unlikely(!table->sgl))
  1527.             return;
  1528.  
  1529.     sgl = table->sgl;
  1530.     while (table->orig_nents) {
  1531.         unsigned int alloc_size = table->orig_nents;
  1532.         unsigned int sg_size;
  1533.  
  1534.         /*
  1535.          * If we have more than max_ents segments left,
  1536.          * then assign 'next' to the sg table after the current one.
  1537.          * sg_size is then one less than alloc size, since the last
  1538.          * element is the chain pointer.
  1539.          */
  1540.         if (alloc_size > max_ents) {
  1541.                 next = sg_chain_ptr(&sgl[max_ents - 1]);
  1542.                 alloc_size = max_ents;
  1543.                 sg_size = alloc_size - 1;
  1544.         } else {
  1545.                 sg_size = alloc_size;
  1546.                 next = NULL;
  1547.         }
  1548.  
  1549.         table->orig_nents -= sg_size;
  1550.         kfree(sgl);
  1551.         sgl = next;
  1552.     }
  1553.  
  1554.     table->sgl = NULL;
  1555. }
  1556.  
  1557. void sg_free_table(struct sg_table *table)
  1558. {
  1559.     __sg_free_table(table, SG_MAX_SINGLE_ALLOC, NULL);
  1560. }
  1561.  
  1562. int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
  1563. {
  1564.     struct scatterlist *sg, *prv;
  1565.     unsigned int left;
  1566.     unsigned int max_ents = SG_MAX_SINGLE_ALLOC;
  1567.  
  1568. #ifndef ARCH_HAS_SG_CHAIN
  1569.     BUG_ON(nents > max_ents);
  1570. #endif
  1571.  
  1572.     memset(table, 0, sizeof(*table));
  1573.  
  1574.     left = nents;
  1575.     prv = NULL;
  1576.     do {
  1577.         unsigned int sg_size, alloc_size = left;
  1578.  
  1579.         if (alloc_size > max_ents) {
  1580.                 alloc_size = max_ents;
  1581.                 sg_size = alloc_size - 1;
  1582.         } else
  1583.                 sg_size = alloc_size;
  1584.  
  1585.         left -= sg_size;
  1586.  
  1587.         sg = kmalloc(alloc_size * sizeof(struct scatterlist), gfp_mask);
  1588.         if (unlikely(!sg)) {
  1589.                 /*
  1590.                  * Adjust entry count to reflect that the last
  1591.                  * entry of the previous table won't be used for
  1592.                  * linkage.  Without this, sg_kfree() may get
  1593.                  * confused.
  1594.                  */
  1595.                 if (prv)
  1596.                         table->nents = ++table->orig_nents;
  1597.  
  1598.                 goto err;
  1599.         }
  1600.  
  1601.         sg_init_table(sg, alloc_size);
  1602.         table->nents = table->orig_nents += sg_size;
  1603.  
  1604.         /*
  1605.          * If this is the first mapping, assign the sg table header.
  1606.          * If this is not the first mapping, chain previous part.
  1607.          */
  1608.         if (prv)
  1609.                 sg_chain(prv, max_ents, sg);
  1610.         else
  1611.                 table->sgl = sg;
  1612.  
  1613.         /*
  1614.          * If no more entries after this one, mark the end
  1615.          */
  1616.         if (!left)
  1617.                 sg_mark_end(&sg[sg_size - 1]);
  1618.  
  1619.         prv = sg;
  1620.     } while (left);
  1621.  
  1622.     return 0;
  1623.  
  1624. err:
  1625.     __sg_free_table(table, SG_MAX_SINGLE_ALLOC, NULL);
  1626.  
  1627.     return -ENOMEM;
  1628. }
  1629.  
  1630.  
  1631. void sg_init_table(struct scatterlist *sgl, unsigned int nents)
  1632. {
  1633.     memset(sgl, 0, sizeof(*sgl) * nents);
  1634. #ifdef CONFIG_DEBUG_SG
  1635.     {
  1636.             unsigned int i;
  1637.             for (i = 0; i < nents; i++)
  1638.                     sgl[i].sg_magic = SG_MAGIC;
  1639.     }
  1640. #endif
  1641.     sg_mark_end(&sgl[nents - 1]);
  1642. }
  1643.  
  1644.  
  1645. void __sg_page_iter_start(struct sg_page_iter *piter,
  1646.               struct scatterlist *sglist, unsigned int nents,
  1647.               unsigned long pgoffset)
  1648. {
  1649.     piter->__pg_advance = 0;
  1650.     piter->__nents = nents;
  1651.  
  1652.     piter->sg = sglist;
  1653.     piter->sg_pgoffset = pgoffset;
  1654. }
  1655.  
  1656. static int sg_page_count(struct scatterlist *sg)
  1657. {
  1658.     return PAGE_ALIGN(sg->offset + sg->length) >> PAGE_SHIFT;
  1659. }
  1660.  
  1661. bool __sg_page_iter_next(struct sg_page_iter *piter)
  1662. {
  1663.     if (!piter->__nents || !piter->sg)
  1664.         return false;
  1665.  
  1666.     piter->sg_pgoffset += piter->__pg_advance;
  1667.     piter->__pg_advance = 1;
  1668.  
  1669.     while (piter->sg_pgoffset >= sg_page_count(piter->sg)) {
  1670.         piter->sg_pgoffset -= sg_page_count(piter->sg);
  1671.         piter->sg = sg_next(piter->sg);
  1672.         if (!--piter->__nents || !piter->sg)
  1673.             return false;
  1674.     }
  1675.  
  1676.     return true;
  1677. }
  1678. EXPORT_SYMBOL(__sg_page_iter_next);
  1679.  
  1680.  
  1681.