1/*
2 * GTT virtualization
3 *
4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Zhi Wang <zhi.a.wang@intel.com>
27 * Zhenyu Wang <zhenyuw@linux.intel.com>
28 * Xiao Zheng <xiao.zheng@intel.com>
29 *
30 * Contributors:
31 * Min He <min.he@intel.com>
32 * Bing Niu <bing.niu@intel.com>
33 *
34 */
35
36#include <drm/drm_print.h>
37
38#include "i915_drv.h"
39#include "gvt.h"
40#include "i915_pvinfo.h"
41#include "trace.h"
42
43#include "gt/intel_gt_regs.h"
44#include <linux/vmalloc.h>
45
46#if defined(VERBOSE_DEBUG)
47#define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
48#else
49#define gvt_vdbg_mm(fmt, args...)
50#endif
51
52static bool enable_out_of_sync = false;
53static int preallocated_oos_pages = 8192;
54
55/*
56 * validate a gm address and related range size,
57 * translate it to host gm address
58 */
59bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
60{
61 if (size == 0)
62 return vgpu_gmadr_is_valid(vgpu, addr);
63
64 if (vgpu_gmadr_is_aperture(vgpu, addr) &&
65 vgpu_gmadr_is_aperture(vgpu, addr + size - 1))
66 return true;
67 else if (vgpu_gmadr_is_hidden(vgpu, addr) &&
68 vgpu_gmadr_is_hidden(vgpu, addr + size - 1))
69 return true;
70
71 gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n",
72 addr, size);
73 return false;
74}
75
76#define gtt_type_is_entry(type) \
77 (type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
78 && type != GTT_TYPE_PPGTT_PTE_ENTRY \
79 && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
80
81#define gtt_type_is_pt(type) \
82 (type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
83
84#define gtt_type_is_pte_pt(type) \
85 (type == GTT_TYPE_PPGTT_PTE_PT)
86
87#define gtt_type_is_root_pointer(type) \
88 (gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
89
90#define gtt_init_entry(e, t, p, v) do { \
91 (e)->type = t; \
92 (e)->pdev = p; \
93 memcpy(&(e)->val64, &v, sizeof(v)); \
94} while (0)
95
96/*
97 * Mappings between GTT_TYPE* enumerations.
98 * Following information can be found according to the given type:
99 * - type of next level page table
100 * - type of entry inside this level page table
101 * - type of entry with PSE set
102 *
103 * If the given type doesn't have such a kind of information,
104 * e.g. give a l4 root entry type, then request to get its PSE type,
105 * give a PTE page table type, then request to get its next level page
106 * table type, as we know l4 root entry doesn't have a PSE bit,
107 * and a PTE page table doesn't have a next level page table type,
108 * GTT_TYPE_INVALID will be returned. This is useful when traversing a
109 * page table.
110 */
111
112struct gtt_type_table_entry {
113 int entry_type;
114 int pt_type;
115 int next_pt_type;
116 int pse_entry_type;
117};
118
119#define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
120 [type] = { \
121 .entry_type = e_type, \
122 .pt_type = cpt_type, \
123 .next_pt_type = npt_type, \
124 .pse_entry_type = pse_type, \
125 }
126
127static const struct gtt_type_table_entry gtt_type_table[] = {
128 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
129 GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
130 GTT_TYPE_INVALID,
131 GTT_TYPE_PPGTT_PML4_PT,
132 GTT_TYPE_INVALID),
133 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
134 GTT_TYPE_PPGTT_PML4_ENTRY,
135 GTT_TYPE_PPGTT_PML4_PT,
136 GTT_TYPE_PPGTT_PDP_PT,
137 GTT_TYPE_INVALID),
138 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
139 GTT_TYPE_PPGTT_PML4_ENTRY,
140 GTT_TYPE_PPGTT_PML4_PT,
141 GTT_TYPE_PPGTT_PDP_PT,
142 GTT_TYPE_INVALID),
143 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
144 GTT_TYPE_PPGTT_PDP_ENTRY,
145 GTT_TYPE_PPGTT_PDP_PT,
146 GTT_TYPE_PPGTT_PDE_PT,
147 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
148 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
149 GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
150 GTT_TYPE_INVALID,
151 GTT_TYPE_PPGTT_PDE_PT,
152 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
153 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
154 GTT_TYPE_PPGTT_PDP_ENTRY,
155 GTT_TYPE_PPGTT_PDP_PT,
156 GTT_TYPE_PPGTT_PDE_PT,
157 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
158 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
159 GTT_TYPE_PPGTT_PDE_ENTRY,
160 GTT_TYPE_PPGTT_PDE_PT,
161 GTT_TYPE_PPGTT_PTE_PT,
162 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
163 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
164 GTT_TYPE_PPGTT_PDE_ENTRY,
165 GTT_TYPE_PPGTT_PDE_PT,
166 GTT_TYPE_PPGTT_PTE_PT,
167 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
168 /* We take IPS bit as 'PSE' for PTE level. */
169 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
170 GTT_TYPE_PPGTT_PTE_4K_ENTRY,
171 GTT_TYPE_PPGTT_PTE_PT,
172 GTT_TYPE_INVALID,
173 GTT_TYPE_PPGTT_PTE_64K_ENTRY),
174 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
175 GTT_TYPE_PPGTT_PTE_4K_ENTRY,
176 GTT_TYPE_PPGTT_PTE_PT,
177 GTT_TYPE_INVALID,
178 GTT_TYPE_PPGTT_PTE_64K_ENTRY),
179 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY,
180 GTT_TYPE_PPGTT_PTE_4K_ENTRY,
181 GTT_TYPE_PPGTT_PTE_PT,
182 GTT_TYPE_INVALID,
183 GTT_TYPE_PPGTT_PTE_64K_ENTRY),
184 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
185 GTT_TYPE_PPGTT_PDE_ENTRY,
186 GTT_TYPE_PPGTT_PDE_PT,
187 GTT_TYPE_INVALID,
188 GTT_TYPE_PPGTT_PTE_2M_ENTRY),
189 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
190 GTT_TYPE_PPGTT_PDP_ENTRY,
191 GTT_TYPE_PPGTT_PDP_PT,
192 GTT_TYPE_INVALID,
193 GTT_TYPE_PPGTT_PTE_1G_ENTRY),
194 GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
195 GTT_TYPE_GGTT_PTE,
196 GTT_TYPE_INVALID,
197 GTT_TYPE_INVALID,
198 GTT_TYPE_INVALID),
199};
200
201static inline int get_next_pt_type(int type)
202{
203 return gtt_type_table[type].next_pt_type;
204}
205
206static inline int get_entry_type(int type)
207{
208 return gtt_type_table[type].entry_type;
209}
210
211static inline int get_pse_type(int type)
212{
213 return gtt_type_table[type].pse_entry_type;
214}
215
216static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index)
217{
218 void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
219
220 return readq(addr);
221}
222
223static void ggtt_invalidate(struct intel_gt *gt)
224{
225 intel_wakeref_t wakeref;
226
227 wakeref = mmio_hw_access_pre(gt);
228 intel_uncore_write(uncore: gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
229 mmio_hw_access_post(gt, wakeref);
230}
231
232static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte)
233{
234 void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
235
236 writeq(val: pte, addr);
237}
238
239static inline int gtt_get_entry64(void *pt,
240 struct intel_gvt_gtt_entry *e,
241 unsigned long index, bool hypervisor_access, unsigned long gpa,
242 struct intel_vgpu *vgpu)
243{
244 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
245 int ret;
246
247 if (WARN_ON(info->gtt_entry_size != 8))
248 return -EINVAL;
249
250 if (hypervisor_access) {
251 ret = intel_gvt_read_gpa(vgpu, gpa: gpa +
252 (index << info->gtt_entry_size_shift),
253 buf: &e->val64, len: 8);
254 if (WARN_ON(ret))
255 return ret;
256 } else if (!pt) {
257 e->val64 = read_pte64(ggtt: vgpu->gvt->gt->ggtt, index);
258 } else {
259 e->val64 = *((u64 *)pt + index);
260 }
261 return 0;
262}
263
264static inline int gtt_set_entry64(void *pt,
265 struct intel_gvt_gtt_entry *e,
266 unsigned long index, bool hypervisor_access, unsigned long gpa,
267 struct intel_vgpu *vgpu)
268{
269 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
270 int ret;
271
272 if (WARN_ON(info->gtt_entry_size != 8))
273 return -EINVAL;
274
275 if (hypervisor_access) {
276 ret = intel_gvt_write_gpa(vgpu, gpa: gpa +
277 (index << info->gtt_entry_size_shift),
278 buf: &e->val64, len: 8);
279 if (WARN_ON(ret))
280 return ret;
281 } else if (!pt) {
282 write_pte64(ggtt: vgpu->gvt->gt->ggtt, index, pte: e->val64);
283 } else {
284 *((u64 *)pt + index) = e->val64;
285 }
286 return 0;
287}
288
289#define GTT_HAW 46
290
291#define ADDR_1G_MASK GENMASK_ULL(GTT_HAW - 1, 30)
292#define ADDR_2M_MASK GENMASK_ULL(GTT_HAW - 1, 21)
293#define ADDR_64K_MASK GENMASK_ULL(GTT_HAW - 1, 16)
294#define ADDR_4K_MASK GENMASK_ULL(GTT_HAW - 1, 12)
295
296#define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52)
297#define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */
298
299#define GTT_64K_PTE_STRIDE 16
300
301static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
302{
303 unsigned long pfn;
304
305 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
306 pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
307 else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
308 pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
309 else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY)
310 pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT;
311 else
312 pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
313 return pfn;
314}
315
316static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
317{
318 if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
319 e->val64 &= ~ADDR_1G_MASK;
320 pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
321 } else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
322 e->val64 &= ~ADDR_2M_MASK;
323 pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
324 } else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) {
325 e->val64 &= ~ADDR_64K_MASK;
326 pfn &= (ADDR_64K_MASK >> PAGE_SHIFT);
327 } else {
328 e->val64 &= ~ADDR_4K_MASK;
329 pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
330 }
331
332 e->val64 |= (pfn << PAGE_SHIFT);
333}
334
335static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
336{
337 return !!(e->val64 & _PAGE_PSE);
338}
339
340static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e)
341{
342 if (gen8_gtt_test_pse(e)) {
343 switch (e->type) {
344 case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
345 e->val64 &= ~_PAGE_PSE;
346 e->type = GTT_TYPE_PPGTT_PDE_ENTRY;
347 break;
348 case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
349 e->type = GTT_TYPE_PPGTT_PDP_ENTRY;
350 e->val64 &= ~_PAGE_PSE;
351 break;
352 default:
353 WARN_ON(1);
354 }
355 }
356}
357
358static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e)
359{
360 if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
361 return false;
362
363 return !!(e->val64 & GEN8_PDE_IPS_64K);
364}
365
366static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e)
367{
368 if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
369 return;
370
371 e->val64 &= ~GEN8_PDE_IPS_64K;
372}
373
374static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
375{
376 /*
377 * i915 writes PDP root pointer registers without present bit,
378 * it also works, so we need to treat root pointer entry
379 * specifically.
380 */
381 if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
382 || e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
383 return (e->val64 != 0);
384 else
385 return (e->val64 & GEN8_PAGE_PRESENT);
386}
387
388static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
389{
390 e->val64 &= ~GEN8_PAGE_PRESENT;
391}
392
393static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
394{
395 e->val64 |= GEN8_PAGE_PRESENT;
396}
397
398static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
399{
400 return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED);
401}
402
403static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e)
404{
405 e->val64 |= GTT_SPTE_FLAG_64K_SPLITED;
406}
407
408static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e)
409{
410 e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED;
411}
412
413/*
414 * Per-platform GMA routines.
415 */
416static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
417{
418 unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
419
420 trace_gma_index(prefix: __func__, gma, index: x);
421 return x;
422}
423
424#define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
425static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
426{ \
427 unsigned long x = (exp); \
428 trace_gma_index(__func__, gma, x); \
429 return x; \
430}
431
432DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
433DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
434DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
435DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
436DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
437
438static const struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
439 .get_entry = gtt_get_entry64,
440 .set_entry = gtt_set_entry64,
441 .clear_present = gtt_entry_clear_present,
442 .set_present = gtt_entry_set_present,
443 .test_present = gen8_gtt_test_present,
444 .test_pse = gen8_gtt_test_pse,
445 .clear_pse = gen8_gtt_clear_pse,
446 .clear_ips = gen8_gtt_clear_ips,
447 .test_ips = gen8_gtt_test_ips,
448 .clear_64k_splited = gen8_gtt_clear_64k_splited,
449 .set_64k_splited = gen8_gtt_set_64k_splited,
450 .test_64k_splited = gen8_gtt_test_64k_splited,
451 .get_pfn = gen8_gtt_get_pfn,
452 .set_pfn = gen8_gtt_set_pfn,
453};
454
455static const struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
456 .gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
457 .gma_to_pte_index = gen8_gma_to_pte_index,
458 .gma_to_pde_index = gen8_gma_to_pde_index,
459 .gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
460 .gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
461 .gma_to_pml4_index = gen8_gma_to_pml4_index,
462};
463
464/* Update entry type per pse and ips bit. */
465static void update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops *pte_ops,
466 struct intel_gvt_gtt_entry *entry, bool ips)
467{
468 switch (entry->type) {
469 case GTT_TYPE_PPGTT_PDE_ENTRY:
470 case GTT_TYPE_PPGTT_PDP_ENTRY:
471 if (pte_ops->test_pse(entry))
472 entry->type = get_pse_type(type: entry->type);
473 break;
474 case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
475 if (ips)
476 entry->type = get_pse_type(type: entry->type);
477 break;
478 default:
479 GEM_BUG_ON(!gtt_type_is_entry(entry->type));
480 }
481
482 GEM_BUG_ON(entry->type == GTT_TYPE_INVALID);
483}
484
485/*
486 * MM helpers.
487 */
488static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
489 struct intel_gvt_gtt_entry *entry, unsigned long index,
490 bool guest)
491{
492 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
493
494 GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
495
496 entry->type = mm->ppgtt_mm.root_entry_type;
497 pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
498 mm->ppgtt_mm.shadow_pdps,
499 entry, index, false, 0, mm->vgpu);
500 update_entry_type_for_real(pte_ops, entry, ips: false);
501}
502
503static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
504 struct intel_gvt_gtt_entry *entry, unsigned long index)
505{
506 _ppgtt_get_root_entry(mm, entry, index, guest: true);
507}
508
509static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
510 struct intel_gvt_gtt_entry *entry, unsigned long index)
511{
512 _ppgtt_get_root_entry(mm, entry, index, guest: false);
513}
514
515static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
516 struct intel_gvt_gtt_entry *entry, unsigned long index,
517 bool guest)
518{
519 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
520
521 pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
522 mm->ppgtt_mm.shadow_pdps,
523 entry, index, false, 0, mm->vgpu);
524}
525
526static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
527 struct intel_gvt_gtt_entry *entry, unsigned long index)
528{
529 _ppgtt_set_root_entry(mm, entry, index, guest: false);
530}
531
532static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
533 struct intel_gvt_gtt_entry *entry, unsigned long index)
534{
535 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
536
537 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
538
539 entry->type = GTT_TYPE_GGTT_PTE;
540 pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
541 false, 0, mm->vgpu);
542}
543
544static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
545 struct intel_gvt_gtt_entry *entry, unsigned long index)
546{
547 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
548
549 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
550
551 pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
552 false, 0, mm->vgpu);
553}
554
555static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
556 struct intel_gvt_gtt_entry *entry, unsigned long index)
557{
558 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
559
560 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
561
562 pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
563}
564
565static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
566 struct intel_gvt_gtt_entry *entry, unsigned long index)
567{
568 const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
569 unsigned long offset = index;
570
571 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
572
573 if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
574 offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
575 mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64;
576 } else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
577 offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
578 mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64;
579 }
580
581 pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
582}
583
584/*
585 * PPGTT shadow page table helpers.
586 */
587static inline int ppgtt_spt_get_entry(
588 struct intel_vgpu_ppgtt_spt *spt,
589 void *page_table, int type,
590 struct intel_gvt_gtt_entry *e, unsigned long index,
591 bool guest)
592{
593 struct intel_gvt *gvt = spt->vgpu->gvt;
594 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
595 int ret;
596
597 e->type = get_entry_type(type);
598
599 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
600 return -EINVAL;
601
602 ret = ops->get_entry(page_table, e, index, guest,
603 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
604 spt->vgpu);
605 if (ret)
606 return ret;
607
608 update_entry_type_for_real(pte_ops: ops, entry: e, ips: guest ?
609 spt->guest_page.pde_ips : false);
610
611 gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
612 type, e->type, index, e->val64);
613 return 0;
614}
615
616static inline int ppgtt_spt_set_entry(
617 struct intel_vgpu_ppgtt_spt *spt,
618 void *page_table, int type,
619 struct intel_gvt_gtt_entry *e, unsigned long index,
620 bool guest)
621{
622 struct intel_gvt *gvt = spt->vgpu->gvt;
623 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
624
625 if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
626 return -EINVAL;
627
628 gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
629 type, e->type, index, e->val64);
630
631 return ops->set_entry(page_table, e, index, guest,
632 spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
633 spt->vgpu);
634}
635
636#define ppgtt_get_guest_entry(spt, e, index) \
637 ppgtt_spt_get_entry(spt, NULL, \
638 spt->guest_page.type, e, index, true)
639
640#define ppgtt_set_guest_entry(spt, e, index) \
641 ppgtt_spt_set_entry(spt, NULL, \
642 spt->guest_page.type, e, index, true)
643
644#define ppgtt_get_shadow_entry(spt, e, index) \
645 ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
646 spt->shadow_page.type, e, index, false)
647
648#define ppgtt_set_shadow_entry(spt, e, index) \
649 ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
650 spt->shadow_page.type, e, index, false)
651
652static void *alloc_spt(gfp_t gfp_mask)
653{
654 struct intel_vgpu_ppgtt_spt *spt;
655
656 spt = kzalloc(sizeof(*spt), gfp_mask);
657 if (!spt)
658 return NULL;
659
660 spt->shadow_page.page = alloc_page(gfp_mask);
661 if (!spt->shadow_page.page) {
662 kfree(objp: spt);
663 return NULL;
664 }
665 return spt;
666}
667
668static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
669{
670 __free_page(spt->shadow_page.page);
671 kfree(objp: spt);
672}
673
674static int detach_oos_page(struct intel_vgpu *vgpu,
675 struct intel_vgpu_oos_page *oos_page);
676
677static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
678{
679 struct device *kdev = spt->vgpu->gvt->gt->i915->drm.dev;
680
681 trace_spt_free(id: spt->vgpu->id, spt, type: spt->guest_page.type);
682
683 dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
684 DMA_BIDIRECTIONAL);
685
686 radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
687
688 if (spt->guest_page.gfn) {
689 if (spt->guest_page.oos_page)
690 detach_oos_page(vgpu: spt->vgpu, oos_page: spt->guest_page.oos_page);
691
692 intel_vgpu_unregister_page_track(vgpu: spt->vgpu, gfn: spt->guest_page.gfn);
693 }
694
695 list_del_init(entry: &spt->post_shadow_list);
696 free_spt(spt);
697}
698
699static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
700{
701 struct intel_vgpu_ppgtt_spt *spt, *spn;
702 struct radix_tree_iter iter;
703 LIST_HEAD(all_spt);
704 void __rcu **slot;
705
706 rcu_read_lock();
707 radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
708 spt = radix_tree_deref_slot(slot);
709 list_move(list: &spt->post_shadow_list, head: &all_spt);
710 }
711 rcu_read_unlock();
712
713 list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list)
714 ppgtt_free_spt(spt);
715}
716
717static int ppgtt_handle_guest_write_page_table_bytes(
718 struct intel_vgpu_ppgtt_spt *spt,
719 u64 pa, void *p_data, int bytes);
720
721static int ppgtt_write_protection_handler(
722 struct intel_vgpu_page_track *page_track,
723 u64 gpa, void *data, int bytes)
724{
725 struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
726
727 int ret;
728
729 if (bytes != 4 && bytes != 8)
730 return -EINVAL;
731
732 ret = ppgtt_handle_guest_write_page_table_bytes(spt, pa: gpa, p_data: data, bytes);
733 if (ret)
734 return ret;
735 return ret;
736}
737
738/* Find a spt by guest gfn. */
739static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
740 struct intel_vgpu *vgpu, unsigned long gfn)
741{
742 struct intel_vgpu_page_track *track;
743
744 track = intel_vgpu_find_page_track(vgpu, gfn);
745 if (track && track->handler == ppgtt_write_protection_handler)
746 return track->priv_data;
747
748 return NULL;
749}
750
751/* Find the spt by shadow page mfn. */
752static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
753 struct intel_vgpu *vgpu, unsigned long mfn)
754{
755 return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
756}
757
758static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
759
760/* Allocate shadow page table without guest page. */
761static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
762 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
763{
764 struct device *kdev = vgpu->gvt->gt->i915->drm.dev;
765 struct intel_vgpu_ppgtt_spt *spt = NULL;
766 dma_addr_t daddr;
767 int ret;
768
769retry:
770 spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
771 if (!spt) {
772 if (reclaim_one_ppgtt_mm(gvt: vgpu->gvt))
773 goto retry;
774
775 gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
776 return ERR_PTR(error: -ENOMEM);
777 }
778
779 spt->vgpu = vgpu;
780 atomic_set(v: &spt->refcount, i: 1);
781 INIT_LIST_HEAD(list: &spt->post_shadow_list);
782
783 /*
784 * Init shadow_page.
785 */
786 spt->shadow_page.type = type;
787 daddr = dma_map_page(kdev, spt->shadow_page.page,
788 0, 4096, DMA_BIDIRECTIONAL);
789 if (dma_mapping_error(dev: kdev, dma_addr: daddr)) {
790 gvt_vgpu_err("fail to map dma addr\n");
791 ret = -EINVAL;
792 goto err_free_spt;
793 }
794 spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
795 spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
796
797 ret = radix_tree_insert(&vgpu->gtt.spt_tree, index: spt->shadow_page.mfn, spt);
798 if (ret)
799 goto err_unmap_dma;
800
801 return spt;
802
803err_unmap_dma:
804 dma_unmap_page(kdev, daddr, PAGE_SIZE, DMA_BIDIRECTIONAL);
805err_free_spt:
806 free_spt(spt);
807 return ERR_PTR(error: ret);
808}
809
810/* Allocate shadow page table associated with specific gfn. */
811static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
812 struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
813 unsigned long gfn, bool guest_pde_ips)
814{
815 struct intel_vgpu_ppgtt_spt *spt;
816 int ret;
817
818 spt = ppgtt_alloc_spt(vgpu, type);
819 if (IS_ERR(ptr: spt))
820 return spt;
821
822 /*
823 * Init guest_page.
824 */
825 ret = intel_vgpu_register_page_track(vgpu, gfn,
826 handler: ppgtt_write_protection_handler, priv: spt);
827 if (ret) {
828 ppgtt_free_spt(spt);
829 return ERR_PTR(error: ret);
830 }
831
832 spt->guest_page.type = type;
833 spt->guest_page.gfn = gfn;
834 spt->guest_page.pde_ips = guest_pde_ips;
835
836 trace_spt_alloc(id: vgpu->id, spt, type, mfn: spt->shadow_page.mfn, gpt_gfn: gfn);
837
838 return spt;
839}
840
841#define pt_entry_size_shift(spt) \
842 ((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
843
844#define pt_entries(spt) \
845 (I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
846
847#define for_each_present_guest_entry(spt, e, i) \
848 for (i = 0; i < pt_entries(spt); \
849 i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
850 if (!ppgtt_get_guest_entry(spt, e, i) && \
851 spt->vgpu->gvt->gtt.pte_ops->test_present(e))
852
853#define for_each_present_shadow_entry(spt, e, i) \
854 for (i = 0; i < pt_entries(spt); \
855 i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
856 if (!ppgtt_get_shadow_entry(spt, e, i) && \
857 spt->vgpu->gvt->gtt.pte_ops->test_present(e))
858
859#define for_each_shadow_entry(spt, e, i) \
860 for (i = 0; i < pt_entries(spt); \
861 i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \
862 if (!ppgtt_get_shadow_entry(spt, e, i))
863
864static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
865{
866 int v = atomic_read(v: &spt->refcount);
867
868 trace_spt_refcount(id: spt->vgpu->id, action: "inc", spt, before: v, after: (v + 1));
869 atomic_inc(v: &spt->refcount);
870}
871
872static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
873{
874 int v = atomic_read(v: &spt->refcount);
875
876 trace_spt_refcount(id: spt->vgpu->id, action: "dec", spt, before: v, after: (v - 1));
877 return atomic_dec_return(v: &spt->refcount);
878}
879
880static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
881
882static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
883 struct intel_gvt_gtt_entry *e)
884{
885 struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
886 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
887 struct intel_vgpu_ppgtt_spt *s;
888 enum intel_gvt_gtt_type cur_pt_type;
889
890 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
891
892 if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
893 && e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
894 cur_pt_type = get_next_pt_type(type: e->type);
895
896 if (!gtt_type_is_pt(cur_pt_type) ||
897 !gtt_type_is_pt(cur_pt_type + 1)) {
898 drm_WARN(&i915->drm, 1,
899 "Invalid page table type, cur_pt_type is: %d\n",
900 cur_pt_type);
901 return -EINVAL;
902 }
903
904 cur_pt_type += 1;
905
906 if (ops->get_pfn(e) ==
907 vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
908 return 0;
909 }
910 s = intel_vgpu_find_spt_by_mfn(vgpu, mfn: ops->get_pfn(e));
911 if (!s) {
912 gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
913 ops->get_pfn(e));
914 return -ENXIO;
915 }
916 return ppgtt_invalidate_spt(spt: s);
917}
918
919static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
920 struct intel_gvt_gtt_entry *entry)
921{
922 struct intel_vgpu *vgpu = spt->vgpu;
923 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
924 unsigned long pfn;
925 int type;
926
927 pfn = ops->get_pfn(entry);
928 type = spt->shadow_page.type;
929
930 /* Uninitialized spte or unshadowed spte. */
931 if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn)
932 return;
933
934 intel_gvt_dma_unmap_guest_page(vgpu, dma_addr: pfn << PAGE_SHIFT);
935}
936
937static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
938{
939 struct intel_vgpu *vgpu = spt->vgpu;
940 struct intel_gvt_gtt_entry e;
941 unsigned long index;
942 int ret;
943
944 trace_spt_change(id: spt->vgpu->id, action: "die", spt,
945 gfn: spt->guest_page.gfn, type: spt->shadow_page.type);
946
947 if (ppgtt_put_spt(spt) > 0)
948 return 0;
949
950 for_each_present_shadow_entry(spt, &e, index) {
951 switch (e.type) {
952 case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
953 gvt_vdbg_mm("invalidate 4K entry\n");
954 ppgtt_invalidate_pte(spt, entry: &e);
955 break;
956 case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
957 /* We don't setup 64K shadow entry so far. */
958 WARN(1, "suspicious 64K gtt entry\n");
959 continue;
960 case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
961 gvt_vdbg_mm("invalidate 2M entry\n");
962 continue;
963 case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
964 WARN(1, "GVT doesn't support 1GB page\n");
965 continue;
966 case GTT_TYPE_PPGTT_PML4_ENTRY:
967 case GTT_TYPE_PPGTT_PDP_ENTRY:
968 case GTT_TYPE_PPGTT_PDE_ENTRY:
969 gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
970 ret = ppgtt_invalidate_spt_by_shadow_entry(
971 vgpu: spt->vgpu, e: &e);
972 if (ret)
973 goto fail;
974 break;
975 default:
976 GEM_BUG_ON(1);
977 }
978 }
979
980 trace_spt_change(id: spt->vgpu->id, action: "release", spt,
981 gfn: spt->guest_page.gfn, type: spt->shadow_page.type);
982 ppgtt_free_spt(spt);
983 return 0;
984fail:
985 gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
986 spt, e.val64, e.type);
987 return ret;
988}
989
990static bool vgpu_ips_enabled(struct intel_vgpu *vgpu)
991{
992 struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
993
994 if (GRAPHICS_VER(dev_priv) == 9) {
995 u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) &
996 GAMW_ECO_ENABLE_64K_IPS_FIELD;
997
998 return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD;
999 } else if (GRAPHICS_VER(dev_priv) >= 11) {
1000 /* 64K paging only controlled by IPS bit in PTE now. */
1001 return true;
1002 } else
1003 return false;
1004}
1005
1006static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
1007
1008static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
1009 struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
1010{
1011 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1012 struct intel_vgpu_ppgtt_spt *spt = NULL;
1013 bool ips = false;
1014 int ret;
1015
1016 GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
1017
1018 if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1019 ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
1020
1021 spt = intel_vgpu_find_spt_by_gfn(vgpu, gfn: ops->get_pfn(we));
1022 if (spt) {
1023 ppgtt_get_spt(spt);
1024
1025 if (ips != spt->guest_page.pde_ips) {
1026 spt->guest_page.pde_ips = ips;
1027
1028 gvt_dbg_mm("reshadow PDE since ips changed\n");
1029 clear_page(page: spt->shadow_page.vaddr);
1030 ret = ppgtt_populate_spt(spt);
1031 if (ret) {
1032 ppgtt_put_spt(spt);
1033 goto err;
1034 }
1035 }
1036 } else {
1037 int type = get_next_pt_type(type: we->type);
1038
1039 if (!gtt_type_is_pt(type)) {
1040 ret = -EINVAL;
1041 goto err;
1042 }
1043
1044 spt = ppgtt_alloc_spt_gfn(vgpu, type, gfn: ops->get_pfn(we), guest_pde_ips: ips);
1045 if (IS_ERR(ptr: spt)) {
1046 ret = PTR_ERR(ptr: spt);
1047 goto err;
1048 }
1049
1050 ret = intel_vgpu_enable_page_track(vgpu, gfn: spt->guest_page.gfn);
1051 if (ret)
1052 goto err_free_spt;
1053
1054 ret = ppgtt_populate_spt(spt);
1055 if (ret)
1056 goto err_free_spt;
1057
1058 trace_spt_change(id: vgpu->id, action: "new", spt, gfn: spt->guest_page.gfn,
1059 type: spt->shadow_page.type);
1060 }
1061 return spt;
1062
1063err_free_spt:
1064 ppgtt_free_spt(spt);
1065 spt = NULL;
1066err:
1067 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1068 spt, we->val64, we->type);
1069 return ERR_PTR(error: ret);
1070}
1071
1072static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
1073 struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
1074{
1075 const struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
1076
1077 se->type = ge->type;
1078 se->val64 = ge->val64;
1079
1080 /* Because we always split 64KB pages, so clear IPS in shadow PDE. */
1081 if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1082 ops->clear_ips(se);
1083
1084 ops->set_pfn(se, s->shadow_page.mfn);
1085}
1086
1087static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
1088 struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1089 struct intel_gvt_gtt_entry *se)
1090{
1091 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1092 struct intel_vgpu_ppgtt_spt *sub_spt;
1093 struct intel_gvt_gtt_entry sub_se;
1094 unsigned long start_gfn;
1095 dma_addr_t dma_addr;
1096 unsigned long sub_index;
1097 int ret;
1098
1099 gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index);
1100
1101 start_gfn = ops->get_pfn(se);
1102
1103 sub_spt = ppgtt_alloc_spt(vgpu, type: GTT_TYPE_PPGTT_PTE_PT);
1104 if (IS_ERR(ptr: sub_spt))
1105 return PTR_ERR(ptr: sub_spt);
1106
1107 for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
1108 ret = intel_gvt_dma_map_guest_page(vgpu, gfn: start_gfn + sub_index,
1109 PAGE_SIZE, dma_addr: &dma_addr);
1110 if (ret)
1111 goto err;
1112 sub_se.val64 = se->val64;
1113
1114 /* Copy the PAT field from PDE. */
1115 sub_se.val64 &= ~_PAGE_PAT;
1116 sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5;
1117
1118 ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT);
1119 ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index);
1120 }
1121
1122 /* Clear dirty field. */
1123 se->val64 &= ~_PAGE_DIRTY;
1124
1125 ops->clear_pse(se);
1126 ops->clear_ips(se);
1127 ops->set_pfn(se, sub_spt->shadow_page.mfn);
1128 ppgtt_set_shadow_entry(spt, se, index);
1129 return 0;
1130err:
1131 /* Cancel the existing address mappings of DMA addr. */
1132 for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) {
1133 gvt_vdbg_mm("invalidate 4K entry\n");
1134 ppgtt_invalidate_pte(spt: sub_spt, entry: &sub_se);
1135 }
1136 /* Release the new allocated spt. */
1137 trace_spt_change(id: sub_spt->vgpu->id, action: "release", spt: sub_spt,
1138 gfn: sub_spt->guest_page.gfn, type: sub_spt->shadow_page.type);
1139 ppgtt_free_spt(spt: sub_spt);
1140 return ret;
1141}
1142
1143static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
1144 struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1145 struct intel_gvt_gtt_entry *se)
1146{
1147 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1148 struct intel_gvt_gtt_entry entry = *se;
1149 unsigned long start_gfn;
1150 dma_addr_t dma_addr;
1151 int i, ret;
1152
1153 gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index);
1154
1155 GEM_BUG_ON(index % GTT_64K_PTE_STRIDE);
1156
1157 start_gfn = ops->get_pfn(se);
1158
1159 entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY;
1160 ops->set_64k_splited(&entry);
1161
1162 for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1163 ret = intel_gvt_dma_map_guest_page(vgpu, gfn: start_gfn + i,
1164 PAGE_SIZE, dma_addr: &dma_addr);
1165 if (ret)
1166 return ret;
1167
1168 ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT);
1169 ppgtt_set_shadow_entry(spt, &entry, index + i);
1170 }
1171 return 0;
1172}
1173
1174static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
1175 struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1176 struct intel_gvt_gtt_entry *ge)
1177{
1178 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
1179 struct intel_gvt_gtt_entry se = *ge;
1180 unsigned long gfn;
1181 dma_addr_t dma_addr;
1182 int ret;
1183
1184 if (!pte_ops->test_present(ge))
1185 return 0;
1186
1187 gfn = pte_ops->get_pfn(ge);
1188
1189 switch (ge->type) {
1190 case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
1191 gvt_vdbg_mm("shadow 4K gtt entry\n");
1192 ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, dma_addr: &dma_addr);
1193 if (ret)
1194 return -ENXIO;
1195 break;
1196 case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1197 gvt_vdbg_mm("shadow 64K gtt entry\n");
1198 /*
1199 * The layout of 64K page is special, the page size is
1200 * controlled by upper PDE. To be simple, we always split
1201 * 64K page to smaller 4K pages in shadow PT.
1202 */
1203 return split_64KB_gtt_entry(vgpu, spt, index, se: &se);
1204 case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1205 gvt_vdbg_mm("shadow 2M gtt entry\n");
1206 if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) ||
1207 intel_gvt_dma_map_guest_page(vgpu, gfn,
1208 I915_GTT_PAGE_SIZE_2M, dma_addr: &dma_addr))
1209 return split_2MB_gtt_entry(vgpu, spt, index, se: &se);
1210 break;
1211 case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1212 gvt_vgpu_err("GVT doesn't support 1GB entry\n");
1213 return -EINVAL;
1214 default:
1215 GEM_BUG_ON(1);
1216 return -EINVAL;
1217 }
1218
1219 /* Successfully shadowed a 4K or 2M page (without splitting). */
1220 pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
1221 ppgtt_set_shadow_entry(spt, &se, index);
1222 return 0;
1223}
1224
1225static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
1226{
1227 struct intel_vgpu *vgpu = spt->vgpu;
1228 struct intel_vgpu_ppgtt_spt *s;
1229 struct intel_gvt_gtt_entry se, ge;
1230 unsigned long i;
1231 int ret;
1232
1233 trace_spt_change(id: spt->vgpu->id, action: "born", spt,
1234 gfn: spt->guest_page.gfn, type: spt->shadow_page.type);
1235
1236 for_each_present_guest_entry(spt, &ge, i) {
1237 if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
1238 s = ppgtt_populate_spt_by_guest_entry(vgpu, we: &ge);
1239 if (IS_ERR(ptr: s)) {
1240 ret = PTR_ERR(ptr: s);
1241 goto fail;
1242 }
1243 ppgtt_get_shadow_entry(spt, &se, i);
1244 ppgtt_generate_shadow_entry(se: &se, s, ge: &ge);
1245 ppgtt_set_shadow_entry(spt, &se, i);
1246 } else {
1247 ret = ppgtt_populate_shadow_entry(vgpu, spt, index: i, ge: &ge);
1248 if (ret)
1249 goto fail;
1250 }
1251 }
1252 return 0;
1253fail:
1254 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1255 spt, ge.val64, ge.type);
1256 return ret;
1257}
1258
1259static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
1260 struct intel_gvt_gtt_entry *se, unsigned long index)
1261{
1262 struct intel_vgpu *vgpu = spt->vgpu;
1263 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1264 int ret;
1265
1266 trace_spt_guest_change(id: spt->vgpu->id, tag: "remove", spt,
1267 type: spt->shadow_page.type, v: se->val64, index);
1268
1269 gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
1270 se->type, index, se->val64);
1271
1272 if (!ops->test_present(se))
1273 return 0;
1274
1275 if (ops->get_pfn(se) ==
1276 vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
1277 return 0;
1278
1279 if (gtt_type_is_pt(get_next_pt_type(se->type))) {
1280 struct intel_vgpu_ppgtt_spt *s =
1281 intel_vgpu_find_spt_by_mfn(vgpu, mfn: ops->get_pfn(se));
1282 if (!s) {
1283 gvt_vgpu_err("fail to find guest page\n");
1284 ret = -ENXIO;
1285 goto fail;
1286 }
1287 ret = ppgtt_invalidate_spt(spt: s);
1288 if (ret)
1289 goto fail;
1290 } else {
1291 /* We don't setup 64K shadow entry so far. */
1292 WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY,
1293 "suspicious 64K entry\n");
1294 ppgtt_invalidate_pte(spt, entry: se);
1295 }
1296
1297 return 0;
1298fail:
1299 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1300 spt, se->val64, se->type);
1301 return ret;
1302}
1303
1304static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
1305 struct intel_gvt_gtt_entry *we, unsigned long index)
1306{
1307 struct intel_vgpu *vgpu = spt->vgpu;
1308 struct intel_gvt_gtt_entry m;
1309 struct intel_vgpu_ppgtt_spt *s;
1310 int ret;
1311
1312 trace_spt_guest_change(id: spt->vgpu->id, tag: "add", spt, type: spt->shadow_page.type,
1313 v: we->val64, index);
1314
1315 gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
1316 we->type, index, we->val64);
1317
1318 if (gtt_type_is_pt(get_next_pt_type(we->type))) {
1319 s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
1320 if (IS_ERR(ptr: s)) {
1321 ret = PTR_ERR(ptr: s);
1322 goto fail;
1323 }
1324 ppgtt_get_shadow_entry(spt, &m, index);
1325 ppgtt_generate_shadow_entry(se: &m, s, ge: we);
1326 ppgtt_set_shadow_entry(spt, &m, index);
1327 } else {
1328 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, ge: we);
1329 if (ret)
1330 goto fail;
1331 }
1332 return 0;
1333fail:
1334 gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
1335 spt, we->val64, we->type);
1336 return ret;
1337}
1338
1339static int sync_oos_page(struct intel_vgpu *vgpu,
1340 struct intel_vgpu_oos_page *oos_page)
1341{
1342 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1343 struct intel_gvt *gvt = vgpu->gvt;
1344 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1345 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1346 struct intel_gvt_gtt_entry old, new;
1347 int index;
1348 int ret;
1349
1350 trace_oos_change(id: vgpu->id, tag: "sync", page_id: oos_page->id,
1351 gpt: spt, type: spt->guest_page.type);
1352
1353 old.type = new.type = get_entry_type(type: spt->guest_page.type);
1354 old.val64 = new.val64 = 0;
1355
1356 for (index = 0; index < (I915_GTT_PAGE_SIZE >>
1357 info->gtt_entry_size_shift); index++) {
1358 ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
1359 ops->get_entry(NULL, &new, index, true,
1360 spt->guest_page.gfn << PAGE_SHIFT, vgpu);
1361
1362 if (old.val64 == new.val64
1363 && !test_and_clear_bit(nr: index, addr: spt->post_shadow_bitmap))
1364 continue;
1365
1366 trace_oos_sync(id: vgpu->id, page_id: oos_page->id,
1367 gpt: spt, type: spt->guest_page.type,
1368 v: new.val64, index);
1369
1370 ret = ppgtt_populate_shadow_entry(vgpu, spt, index, ge: &new);
1371 if (ret)
1372 return ret;
1373
1374 ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
1375 }
1376
1377 spt->guest_page.write_cnt = 0;
1378 list_del_init(entry: &spt->post_shadow_list);
1379 return 0;
1380}
1381
1382static int detach_oos_page(struct intel_vgpu *vgpu,
1383 struct intel_vgpu_oos_page *oos_page)
1384{
1385 struct intel_gvt *gvt = vgpu->gvt;
1386 struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1387
1388 trace_oos_change(id: vgpu->id, tag: "detach", page_id: oos_page->id,
1389 gpt: spt, type: spt->guest_page.type);
1390
1391 spt->guest_page.write_cnt = 0;
1392 spt->guest_page.oos_page = NULL;
1393 oos_page->spt = NULL;
1394
1395 list_del_init(entry: &oos_page->vm_list);
1396 list_move_tail(list: &oos_page->list, head: &gvt->gtt.oos_page_free_list_head);
1397
1398 return 0;
1399}
1400
1401static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
1402 struct intel_vgpu_ppgtt_spt *spt)
1403{
1404 struct intel_gvt *gvt = spt->vgpu->gvt;
1405 int ret;
1406
1407 ret = intel_gvt_read_gpa(vgpu: spt->vgpu,
1408 gpa: spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
1409 buf: oos_page->mem, I915_GTT_PAGE_SIZE);
1410 if (ret)
1411 return ret;
1412
1413 oos_page->spt = spt;
1414 spt->guest_page.oos_page = oos_page;
1415
1416 list_move_tail(list: &oos_page->list, head: &gvt->gtt.oos_page_use_list_head);
1417
1418 trace_oos_change(id: spt->vgpu->id, tag: "attach", page_id: oos_page->id,
1419 gpt: spt, type: spt->guest_page.type);
1420 return 0;
1421}
1422
1423static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
1424{
1425 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1426 int ret;
1427
1428 ret = intel_vgpu_enable_page_track(vgpu: spt->vgpu, gfn: spt->guest_page.gfn);
1429 if (ret)
1430 return ret;
1431
1432 trace_oos_change(id: spt->vgpu->id, tag: "set page sync", page_id: oos_page->id,
1433 gpt: spt, type: spt->guest_page.type);
1434
1435 list_del_init(entry: &oos_page->vm_list);
1436 return sync_oos_page(vgpu: spt->vgpu, oos_page);
1437}
1438
1439static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
1440{
1441 struct intel_gvt *gvt = spt->vgpu->gvt;
1442 struct intel_gvt_gtt *gtt = &gvt->gtt;
1443 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1444 int ret;
1445
1446 WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
1447
1448 if (list_empty(head: &gtt->oos_page_free_list_head)) {
1449 oos_page = container_of(gtt->oos_page_use_list_head.next,
1450 struct intel_vgpu_oos_page, list);
1451 ret = ppgtt_set_guest_page_sync(spt: oos_page->spt);
1452 if (ret)
1453 return ret;
1454 ret = detach_oos_page(vgpu: spt->vgpu, oos_page);
1455 if (ret)
1456 return ret;
1457 } else
1458 oos_page = container_of(gtt->oos_page_free_list_head.next,
1459 struct intel_vgpu_oos_page, list);
1460 return attach_oos_page(oos_page, spt);
1461}
1462
1463static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
1464{
1465 struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1466
1467 if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
1468 return -EINVAL;
1469
1470 trace_oos_change(id: spt->vgpu->id, tag: "set page out of sync", page_id: oos_page->id,
1471 gpt: spt, type: spt->guest_page.type);
1472
1473 list_add_tail(new: &oos_page->vm_list, head: &spt->vgpu->gtt.oos_page_list_head);
1474 return intel_vgpu_disable_page_track(vgpu: spt->vgpu, gfn: spt->guest_page.gfn);
1475}
1476
1477/**
1478 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
1479 * @vgpu: a vGPU
1480 *
1481 * This function is called before submitting a guest workload to host,
1482 * to sync all the out-of-synced shadow for vGPU
1483 *
1484 * Returns:
1485 * Zero on success, negative error code if failed.
1486 */
1487int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
1488{
1489 struct list_head *pos, *n;
1490 struct intel_vgpu_oos_page *oos_page;
1491 int ret;
1492
1493 if (!enable_out_of_sync)
1494 return 0;
1495
1496 list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
1497 oos_page = container_of(pos,
1498 struct intel_vgpu_oos_page, vm_list);
1499 ret = ppgtt_set_guest_page_sync(spt: oos_page->spt);
1500 if (ret)
1501 return ret;
1502 }
1503 return 0;
1504}
1505
1506/*
1507 * The heart of PPGTT shadow page table.
1508 */
1509static int ppgtt_handle_guest_write_page_table(
1510 struct intel_vgpu_ppgtt_spt *spt,
1511 struct intel_gvt_gtt_entry *we, unsigned long index)
1512{
1513 struct intel_vgpu *vgpu = spt->vgpu;
1514 int type = spt->shadow_page.type;
1515 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1516 struct intel_gvt_gtt_entry old_se;
1517 int new_present;
1518 int i, ret;
1519
1520 new_present = ops->test_present(we);
1521
1522 /*
1523 * Adding the new entry first and then removing the old one, that can
1524 * guarantee the ppgtt table is validated during the window between
1525 * adding and removal.
1526 */
1527 ppgtt_get_shadow_entry(spt, &old_se, index);
1528
1529 if (new_present) {
1530 ret = ppgtt_handle_guest_entry_add(spt, we, index);
1531 if (ret)
1532 goto fail;
1533 }
1534
1535 ret = ppgtt_handle_guest_entry_removal(spt, se: &old_se, index);
1536 if (ret)
1537 goto fail;
1538
1539 if (!new_present) {
1540 /* For 64KB splited entries, we need clear them all. */
1541 if (ops->test_64k_splited(&old_se) &&
1542 !(index % GTT_64K_PTE_STRIDE)) {
1543 gvt_vdbg_mm("remove splited 64K shadow entries\n");
1544 for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1545 ops->clear_64k_splited(&old_se);
1546 ops->set_pfn(&old_se,
1547 vgpu->gtt.scratch_pt[type].page_mfn);
1548 ppgtt_set_shadow_entry(spt, &old_se, index + i);
1549 }
1550 } else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
1551 old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
1552 ops->clear_pse(&old_se);
1553 ops->set_pfn(&old_se,
1554 vgpu->gtt.scratch_pt[type].page_mfn);
1555 ppgtt_set_shadow_entry(spt, &old_se, index);
1556 } else {
1557 ops->set_pfn(&old_se,
1558 vgpu->gtt.scratch_pt[type].page_mfn);
1559 ppgtt_set_shadow_entry(spt, &old_se, index);
1560 }
1561 }
1562
1563 return 0;
1564fail:
1565 gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
1566 spt, we->val64, we->type);
1567 return ret;
1568}
1569
1570
1571
1572static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
1573{
1574 return enable_out_of_sync
1575 && gtt_type_is_pte_pt(spt->guest_page.type)
1576 && spt->guest_page.write_cnt >= 2;
1577}
1578
1579static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
1580 unsigned long index)
1581{
1582 set_bit(nr: index, addr: spt->post_shadow_bitmap);
1583 if (!list_empty(head: &spt->post_shadow_list))
1584 return;
1585
1586 list_add_tail(new: &spt->post_shadow_list,
1587 head: &spt->vgpu->gtt.post_shadow_list_head);
1588}
1589
1590/**
1591 * intel_vgpu_flush_post_shadow - flush the post shadow transactions
1592 * @vgpu: a vGPU
1593 *
1594 * This function is called before submitting a guest workload to host,
1595 * to flush all the post shadows for a vGPU.
1596 *
1597 * Returns:
1598 * Zero on success, negative error code if failed.
1599 */
1600int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
1601{
1602 struct list_head *pos, *n;
1603 struct intel_vgpu_ppgtt_spt *spt;
1604 struct intel_gvt_gtt_entry ge;
1605 unsigned long index;
1606 int ret;
1607
1608 list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
1609 spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
1610 post_shadow_list);
1611
1612 for_each_set_bit(index, spt->post_shadow_bitmap,
1613 GTT_ENTRY_NUM_IN_ONE_PAGE) {
1614 ppgtt_get_guest_entry(spt, &ge, index);
1615
1616 ret = ppgtt_handle_guest_write_page_table(spt,
1617 we: &ge, index);
1618 if (ret)
1619 return ret;
1620 clear_bit(nr: index, addr: spt->post_shadow_bitmap);
1621 }
1622 list_del_init(entry: &spt->post_shadow_list);
1623 }
1624 return 0;
1625}
1626
1627static int ppgtt_handle_guest_write_page_table_bytes(
1628 struct intel_vgpu_ppgtt_spt *spt,
1629 u64 pa, void *p_data, int bytes)
1630{
1631 struct intel_vgpu *vgpu = spt->vgpu;
1632 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1633 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1634 struct intel_gvt_gtt_entry we, se;
1635 unsigned long index;
1636 int ret;
1637
1638 index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
1639
1640 ppgtt_get_guest_entry(spt, &we, index);
1641
1642 /*
1643 * For page table which has 64K gtt entry, only PTE#0, PTE#16,
1644 * PTE#32, ... PTE#496 are used. Unused PTEs update should be
1645 * ignored.
1646 */
1647 if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
1648 (index % GTT_64K_PTE_STRIDE)) {
1649 gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
1650 index);
1651 return 0;
1652 }
1653
1654 if (bytes == info->gtt_entry_size) {
1655 ret = ppgtt_handle_guest_write_page_table(spt, we: &we, index);
1656 if (ret)
1657 return ret;
1658 } else {
1659 if (!test_bit(index, spt->post_shadow_bitmap)) {
1660 int type = spt->shadow_page.type;
1661
1662 ppgtt_get_shadow_entry(spt, &se, index);
1663 ret = ppgtt_handle_guest_entry_removal(spt, se: &se, index);
1664 if (ret)
1665 return ret;
1666 ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1667 ppgtt_set_shadow_entry(spt, &se, index);
1668 }
1669 ppgtt_set_post_shadow(spt, index);
1670 }
1671
1672 if (!enable_out_of_sync)
1673 return 0;
1674
1675 spt->guest_page.write_cnt++;
1676
1677 if (spt->guest_page.oos_page)
1678 ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
1679 false, 0, vgpu);
1680
1681 if (can_do_out_of_sync(spt)) {
1682 if (!spt->guest_page.oos_page)
1683 ppgtt_allocate_oos_page(spt);
1684
1685 ret = ppgtt_set_guest_page_oos(spt);
1686 if (ret < 0)
1687 return ret;
1688 }
1689 return 0;
1690}
1691
1692static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
1693{
1694 struct intel_vgpu *vgpu = mm->vgpu;
1695 struct intel_gvt *gvt = vgpu->gvt;
1696 struct intel_gvt_gtt *gtt = &gvt->gtt;
1697 const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1698 struct intel_gvt_gtt_entry se;
1699 int index;
1700
1701 if (!mm->ppgtt_mm.shadowed)
1702 return;
1703
1704 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
1705 ppgtt_get_shadow_root_entry(mm, entry: &se, index);
1706
1707 if (!ops->test_present(&se))
1708 continue;
1709
1710 ppgtt_invalidate_spt_by_shadow_entry(vgpu, e: &se);
1711 se.val64 = 0;
1712 ppgtt_set_shadow_root_entry(mm, entry: &se, index);
1713
1714 trace_spt_guest_change(id: vgpu->id, tag: "destroy root pointer",
1715 NULL, type: se.type, v: se.val64, index);
1716 }
1717
1718 mm->ppgtt_mm.shadowed = false;
1719}
1720
1721
1722static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
1723{
1724 struct intel_vgpu *vgpu = mm->vgpu;
1725 struct intel_gvt *gvt = vgpu->gvt;
1726 struct intel_gvt_gtt *gtt = &gvt->gtt;
1727 const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1728 struct intel_vgpu_ppgtt_spt *spt;
1729 struct intel_gvt_gtt_entry ge, se;
1730 int index, ret;
1731
1732 if (mm->ppgtt_mm.shadowed)
1733 return 0;
1734
1735 if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
1736 return -EINVAL;
1737
1738 mm->ppgtt_mm.shadowed = true;
1739
1740 for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
1741 ppgtt_get_guest_root_entry(mm, entry: &ge, index);
1742
1743 if (!ops->test_present(&ge))
1744 continue;
1745
1746 trace_spt_guest_change(id: vgpu->id, tag: __func__, NULL,
1747 type: ge.type, v: ge.val64, index);
1748
1749 spt = ppgtt_populate_spt_by_guest_entry(vgpu, we: &ge);
1750 if (IS_ERR(ptr: spt)) {
1751 gvt_vgpu_err("fail to populate guest root pointer\n");
1752 ret = PTR_ERR(ptr: spt);
1753 goto fail;
1754 }
1755 ppgtt_generate_shadow_entry(se: &se, s: spt, ge: &ge);
1756 ppgtt_set_shadow_root_entry(mm, entry: &se, index);
1757
1758 trace_spt_guest_change(id: vgpu->id, tag: "populate root pointer",
1759 NULL, type: se.type, v: se.val64, index);
1760 }
1761
1762 return 0;
1763fail:
1764 invalidate_ppgtt_mm(mm);
1765 return ret;
1766}
1767
1768static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
1769{
1770 struct intel_vgpu_mm *mm;
1771
1772 mm = kzalloc(sizeof(*mm), GFP_KERNEL);
1773 if (!mm)
1774 return NULL;
1775
1776 mm->vgpu = vgpu;
1777 kref_init(kref: &mm->ref);
1778 atomic_set(v: &mm->pincount, i: 0);
1779
1780 return mm;
1781}
1782
1783static void vgpu_free_mm(struct intel_vgpu_mm *mm)
1784{
1785 kfree(objp: mm);
1786}
1787
1788/**
1789 * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
1790 * @vgpu: a vGPU
1791 * @root_entry_type: ppgtt root entry type
1792 * @pdps: guest pdps.
1793 *
1794 * This function is used to create a ppgtt mm object for a vGPU.
1795 *
1796 * Returns:
1797 * Zero on success, negative error code in pointer if failed.
1798 */
1799struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
1800 enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
1801{
1802 struct intel_gvt *gvt = vgpu->gvt;
1803 struct intel_vgpu_mm *mm;
1804 int ret;
1805
1806 mm = vgpu_alloc_mm(vgpu);
1807 if (!mm)
1808 return ERR_PTR(error: -ENOMEM);
1809
1810 mm->type = INTEL_GVT_MM_PPGTT;
1811
1812 GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
1813 root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
1814 mm->ppgtt_mm.root_entry_type = root_entry_type;
1815
1816 INIT_LIST_HEAD(list: &mm->ppgtt_mm.list);
1817 INIT_LIST_HEAD(list: &mm->ppgtt_mm.lru_list);
1818 INIT_LIST_HEAD(list: &mm->ppgtt_mm.link);
1819
1820 if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
1821 mm->ppgtt_mm.guest_pdps[0] = pdps[0];
1822 else
1823 memcpy(mm->ppgtt_mm.guest_pdps, pdps,
1824 sizeof(mm->ppgtt_mm.guest_pdps));
1825
1826 ret = shadow_ppgtt_mm(mm);
1827 if (ret) {
1828 gvt_vgpu_err("failed to shadow ppgtt mm\n");
1829 vgpu_free_mm(mm);
1830 return ERR_PTR(error: ret);
1831 }
1832
1833 list_add_tail(new: &mm->ppgtt_mm.list, head: &vgpu->gtt.ppgtt_mm_list_head);
1834
1835 mutex_lock(&gvt->gtt.ppgtt_mm_lock);
1836 list_add_tail(new: &mm->ppgtt_mm.lru_list, head: &gvt->gtt.ppgtt_mm_lru_list_head);
1837 mutex_unlock(lock: &gvt->gtt.ppgtt_mm_lock);
1838
1839 return mm;
1840}
1841
1842static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
1843{
1844 struct intel_vgpu_mm *mm;
1845 unsigned long nr_entries;
1846
1847 mm = vgpu_alloc_mm(vgpu);
1848 if (!mm)
1849 return ERR_PTR(error: -ENOMEM);
1850
1851 mm->type = INTEL_GVT_MM_GGTT;
1852
1853 nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
1854 mm->ggtt_mm.virtual_ggtt =
1855 vzalloc(array_size(nr_entries,
1856 vgpu->gvt->device_info.gtt_entry_size));
1857 if (!mm->ggtt_mm.virtual_ggtt) {
1858 vgpu_free_mm(mm);
1859 return ERR_PTR(error: -ENOMEM);
1860 }
1861
1862 mm->ggtt_mm.host_ggtt_aperture = vzalloc((vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
1863 if (!mm->ggtt_mm.host_ggtt_aperture) {
1864 vfree(addr: mm->ggtt_mm.virtual_ggtt);
1865 vgpu_free_mm(mm);
1866 return ERR_PTR(error: -ENOMEM);
1867 }
1868
1869 mm->ggtt_mm.host_ggtt_hidden = vzalloc((vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
1870 if (!mm->ggtt_mm.host_ggtt_hidden) {
1871 vfree(addr: mm->ggtt_mm.host_ggtt_aperture);
1872 vfree(addr: mm->ggtt_mm.virtual_ggtt);
1873 vgpu_free_mm(mm);
1874 return ERR_PTR(error: -ENOMEM);
1875 }
1876
1877 return mm;
1878}
1879
1880/**
1881 * _intel_vgpu_mm_release - destroy a mm object
1882 * @mm_ref: a kref object
1883 *
1884 * This function is used to destroy a mm object for vGPU
1885 *
1886 */
1887void _intel_vgpu_mm_release(struct kref *mm_ref)
1888{
1889 struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1890
1891 if (GEM_WARN_ON(atomic_read(&mm->pincount)))
1892 gvt_err("vgpu mm pin count bug detected\n");
1893
1894 if (mm->type == INTEL_GVT_MM_PPGTT) {
1895 list_del(entry: &mm->ppgtt_mm.list);
1896
1897 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1898 list_del(entry: &mm->ppgtt_mm.lru_list);
1899 mutex_unlock(lock: &mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1900
1901 invalidate_ppgtt_mm(mm);
1902 } else {
1903 vfree(addr: mm->ggtt_mm.virtual_ggtt);
1904 vfree(addr: mm->ggtt_mm.host_ggtt_aperture);
1905 vfree(addr: mm->ggtt_mm.host_ggtt_hidden);
1906 }
1907
1908 vgpu_free_mm(mm);
1909}
1910
1911/**
1912 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
1913 * @mm: a vGPU mm object
1914 *
1915 * This function is called when user doesn't want to use a vGPU mm object
1916 */
1917void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
1918{
1919 atomic_dec_if_positive(v: &mm->pincount);
1920}
1921
1922/**
1923 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
1924 * @mm: target vgpu mm
1925 *
1926 * This function is called when user wants to use a vGPU mm object. If this
1927 * mm object hasn't been shadowed yet, the shadow will be populated at this
1928 * time.
1929 *
1930 * Returns:
1931 * Zero on success, negative error code if failed.
1932 */
1933int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
1934{
1935 int ret;
1936
1937 atomic_inc(v: &mm->pincount);
1938
1939 if (mm->type == INTEL_GVT_MM_PPGTT) {
1940 ret = shadow_ppgtt_mm(mm);
1941 if (ret)
1942 return ret;
1943
1944 mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1945 list_move_tail(list: &mm->ppgtt_mm.lru_list,
1946 head: &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
1947 mutex_unlock(lock: &mm->vgpu->gvt->gtt.ppgtt_mm_lock);
1948 }
1949
1950 return 0;
1951}
1952
1953static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
1954{
1955 struct intel_vgpu_mm *mm;
1956 struct list_head *pos, *n;
1957
1958 mutex_lock(&gvt->gtt.ppgtt_mm_lock);
1959
1960 list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
1961 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
1962
1963 if (atomic_read(v: &mm->pincount))
1964 continue;
1965
1966 list_del_init(entry: &mm->ppgtt_mm.lru_list);
1967 mutex_unlock(lock: &gvt->gtt.ppgtt_mm_lock);
1968 invalidate_ppgtt_mm(mm);
1969 return 1;
1970 }
1971 mutex_unlock(lock: &gvt->gtt.ppgtt_mm_lock);
1972 return 0;
1973}
1974
1975/*
1976 * GMA translation APIs.
1977 */
1978static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
1979 struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
1980{
1981 struct intel_vgpu *vgpu = mm->vgpu;
1982 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1983 struct intel_vgpu_ppgtt_spt *s;
1984
1985 s = intel_vgpu_find_spt_by_mfn(vgpu, mfn: ops->get_pfn(e));
1986 if (!s)
1987 return -ENXIO;
1988
1989 if (!guest)
1990 ppgtt_get_shadow_entry(s, e, index);
1991 else
1992 ppgtt_get_guest_entry(s, e, index);
1993 return 0;
1994}
1995
1996/**
1997 * intel_vgpu_gma_to_gpa - translate a gma to GPA
1998 * @mm: mm object. could be a PPGTT or GGTT mm object
1999 * @gma: graphics memory address in this mm object
2000 *
2001 * This function is used to translate a graphics memory address in specific
2002 * graphics memory space to guest physical address.
2003 *
2004 * Returns:
2005 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
2006 */
2007unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
2008{
2009 struct intel_vgpu *vgpu = mm->vgpu;
2010 struct intel_gvt *gvt = vgpu->gvt;
2011 const struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
2012 const struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
2013 unsigned long gpa = INTEL_GVT_INVALID_ADDR;
2014 unsigned long gma_index[4];
2015 struct intel_gvt_gtt_entry e;
2016 int i, levels = 0;
2017 int ret;
2018
2019 GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
2020 mm->type != INTEL_GVT_MM_PPGTT);
2021
2022 if (mm->type == INTEL_GVT_MM_GGTT) {
2023 if (!vgpu_gmadr_is_valid(vgpu, gma))
2024 goto err;
2025
2026 ggtt_get_guest_entry(mm, entry: &e,
2027 index: gma_ops->gma_to_ggtt_pte_index(gma));
2028
2029 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
2030 + (gma & ~I915_GTT_PAGE_MASK);
2031
2032 trace_gma_translate(id: vgpu->id, type: "ggtt", ring_id: 0, root_entry_type: 0, gma, gpa);
2033 } else {
2034 switch (mm->ppgtt_mm.root_entry_type) {
2035 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2036 ppgtt_get_shadow_root_entry(mm, entry: &e, index: 0);
2037
2038 gma_index[0] = gma_ops->gma_to_pml4_index(gma);
2039 gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
2040 gma_index[2] = gma_ops->gma_to_pde_index(gma);
2041 gma_index[3] = gma_ops->gma_to_pte_index(gma);
2042 levels = 4;
2043 break;
2044 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2045 ppgtt_get_shadow_root_entry(mm, entry: &e,
2046 index: gma_ops->gma_to_l3_pdp_index(gma));
2047
2048 gma_index[0] = gma_ops->gma_to_pde_index(gma);
2049 gma_index[1] = gma_ops->gma_to_pte_index(gma);
2050 levels = 2;
2051 break;
2052 default:
2053 GEM_BUG_ON(1);
2054 }
2055
2056 /* walk the shadow page table and get gpa from guest entry */
2057 for (i = 0; i < levels; i++) {
2058 ret = ppgtt_get_next_level_entry(mm, e: &e, index: gma_index[i],
2059 guest: (i == levels - 1));
2060 if (ret)
2061 goto err;
2062
2063 if (!pte_ops->test_present(&e)) {
2064 gvt_dbg_core("GMA 0x%lx is not present\n", gma);
2065 goto err;
2066 }
2067 }
2068
2069 gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
2070 (gma & ~I915_GTT_PAGE_MASK);
2071 trace_gma_translate(id: vgpu->id, type: "ppgtt", ring_id: 0,
2072 root_entry_type: mm->ppgtt_mm.root_entry_type, gma, gpa);
2073 }
2074
2075 return gpa;
2076err:
2077 gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
2078 return INTEL_GVT_INVALID_ADDR;
2079}
2080
2081static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
2082 unsigned int off, void *p_data, unsigned int bytes)
2083{
2084 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2085 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2086 unsigned long index = off >> info->gtt_entry_size_shift;
2087 unsigned long gma;
2088 struct intel_gvt_gtt_entry e;
2089
2090 if (bytes != 4 && bytes != 8)
2091 return -EINVAL;
2092
2093 gma = index << I915_GTT_PAGE_SHIFT;
2094 if (!intel_gvt_ggtt_validate_range(vgpu,
2095 addr: gma, size: 1 << I915_GTT_PAGE_SHIFT)) {
2096 gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma);
2097 memset(p_data, 0, bytes);
2098 return 0;
2099 }
2100
2101 ggtt_get_guest_entry(mm: ggtt_mm, entry: &e, index);
2102 memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
2103 bytes);
2104 return 0;
2105}
2106
2107/**
2108 * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read
2109 * @vgpu: a vGPU
2110 * @off: register offset
2111 * @p_data: data will be returned to guest
2112 * @bytes: data length
2113 *
2114 * This function is used to emulate the GTT MMIO register read
2115 *
2116 * Returns:
2117 * Zero on success, error code if failed.
2118 */
2119int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
2120 void *p_data, unsigned int bytes)
2121{
2122 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2123 int ret;
2124
2125 if (bytes != 4 && bytes != 8)
2126 return -EINVAL;
2127
2128 off -= info->gtt_start_offset;
2129 ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
2130 return ret;
2131}
2132
2133static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
2134 struct intel_gvt_gtt_entry *entry)
2135{
2136 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2137 unsigned long pfn;
2138
2139 pfn = pte_ops->get_pfn(entry);
2140 if (pfn != vgpu->gvt->gtt.scratch_mfn)
2141 intel_gvt_dma_unmap_guest_page(vgpu, dma_addr: pfn << PAGE_SHIFT);
2142}
2143
2144static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
2145 void *p_data, unsigned int bytes)
2146{
2147 struct intel_gvt *gvt = vgpu->gvt;
2148 const struct intel_gvt_device_info *info = &gvt->device_info;
2149 struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2150 const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
2151 unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
2152 unsigned long gma, gfn;
2153 struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2154 struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2155 dma_addr_t dma_addr;
2156 int ret;
2157 struct intel_gvt_partial_pte *partial_pte, *pos, *n;
2158 bool partial_update = false;
2159
2160 if (bytes != 4 && bytes != 8)
2161 return -EINVAL;
2162
2163 gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
2164
2165 /* the VM may configure the whole GM space when ballooning is used */
2166 if (!vgpu_gmadr_is_valid(vgpu, gma))
2167 return 0;
2168
2169 e.type = GTT_TYPE_GGTT_PTE;
2170 memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
2171 bytes);
2172
2173 /* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
2174 * write, save the first 4 bytes in a list and update virtual
2175 * PTE. Only update shadow PTE when the second 4 bytes comes.
2176 */
2177 if (bytes < info->gtt_entry_size) {
2178 bool found = false;
2179
2180 list_for_each_entry_safe(pos, n,
2181 &ggtt_mm->ggtt_mm.partial_pte_list, list) {
2182 if (g_gtt_index == pos->offset >>
2183 info->gtt_entry_size_shift) {
2184 if (off != pos->offset) {
2185 /* the second partial part*/
2186 int last_off = pos->offset &
2187 (info->gtt_entry_size - 1);
2188
2189 memcpy((void *)&e.val64 + last_off,
2190 (void *)&pos->data + last_off,
2191 bytes);
2192
2193 list_del(entry: &pos->list);
2194 kfree(objp: pos);
2195 found = true;
2196 break;
2197 }
2198
2199 /* update of the first partial part */
2200 pos->data = e.val64;
2201 ggtt_set_guest_entry(mm: ggtt_mm, entry: &e, index: g_gtt_index);
2202 return 0;
2203 }
2204 }
2205
2206 if (!found) {
2207 /* the first partial part */
2208 partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL);
2209 if (!partial_pte)
2210 return -ENOMEM;
2211 partial_pte->offset = off;
2212 partial_pte->data = e.val64;
2213 list_add_tail(new: &partial_pte->list,
2214 head: &ggtt_mm->ggtt_mm.partial_pte_list);
2215 partial_update = true;
2216 }
2217 }
2218
2219 if (!partial_update && (ops->test_present(&e))) {
2220 gfn = ops->get_pfn(&e);
2221 m.val64 = e.val64;
2222 m.type = e.type;
2223
2224 ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE,
2225 dma_addr: &dma_addr);
2226 if (ret) {
2227 gvt_vgpu_err("fail to populate guest ggtt entry\n");
2228 /* guest driver may read/write the entry when partial
2229 * update the entry in this situation p2m will fail
2230 * setting the shadow entry to point to a scratch page
2231 */
2232 ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2233 } else
2234 ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
2235 } else {
2236 ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2237 ops->clear_present(&m);
2238 }
2239
2240 ggtt_set_guest_entry(mm: ggtt_mm, entry: &e, index: g_gtt_index);
2241
2242 ggtt_get_host_entry(mm: ggtt_mm, entry: &e, index: g_gtt_index);
2243 ggtt_invalidate_pte(vgpu, entry: &e);
2244
2245 ggtt_set_host_entry(mm: ggtt_mm, entry: &m, index: g_gtt_index);
2246 ggtt_invalidate(gt: gvt->gt);
2247 return 0;
2248}
2249
2250/*
2251 * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
2252 * @vgpu: a vGPU
2253 * @off: register offset
2254 * @p_data: data from guest write
2255 * @bytes: data length
2256 *
2257 * This function is used to emulate the GTT MMIO register write
2258 *
2259 * Returns:
2260 * Zero on success, error code if failed.
2261 */
2262int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
2263 unsigned int off, void *p_data, unsigned int bytes)
2264{
2265 const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2266 int ret;
2267 struct intel_vgpu_submission *s = &vgpu->submission;
2268 struct intel_engine_cs *engine;
2269 int i;
2270
2271 if (bytes != 4 && bytes != 8)
2272 return -EINVAL;
2273
2274 off -= info->gtt_start_offset;
2275 ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
2276
2277 /* if ggtt of last submitted context is written,
2278 * that context is probably got unpinned.
2279 * Set last shadowed ctx to invalid.
2280 */
2281 for_each_engine(engine, vgpu->gvt->gt, i) {
2282 if (!s->last_ctx[i].valid)
2283 continue;
2284
2285 if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift))
2286 s->last_ctx[i].valid = false;
2287 }
2288 return ret;
2289}
2290
2291static int alloc_scratch_pages(struct intel_vgpu *vgpu,
2292 enum intel_gvt_gtt_type type)
2293{
2294 struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
2295 struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2296 const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
2297 int page_entry_num = I915_GTT_PAGE_SIZE >>
2298 vgpu->gvt->device_info.gtt_entry_size_shift;
2299 void *scratch_pt;
2300 int i;
2301 struct device *dev = vgpu->gvt->gt->i915->drm.dev;
2302 dma_addr_t daddr;
2303
2304 if (drm_WARN_ON(&i915->drm,
2305 type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
2306 return -EINVAL;
2307
2308 scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
2309 if (!scratch_pt) {
2310 gvt_vgpu_err("fail to allocate scratch page\n");
2311 return -ENOMEM;
2312 }
2313
2314 daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, DMA_BIDIRECTIONAL);
2315 if (dma_mapping_error(dev, dma_addr: daddr)) {
2316 gvt_vgpu_err("fail to dmamap scratch_pt\n");
2317 __free_page(virt_to_page(scratch_pt));
2318 return -ENOMEM;
2319 }
2320 gtt->scratch_pt[type].page_mfn =
2321 (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2322 gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
2323 gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
2324 vgpu->id, type, gtt->scratch_pt[type].page_mfn);
2325
2326 /* Build the tree by full filled the scratch pt with the entries which
2327 * point to the next level scratch pt or scratch page. The
2328 * scratch_pt[type] indicate the scratch pt/scratch page used by the
2329 * 'type' pt.
2330 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
2331 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
2332 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
2333 */
2334 if (type > GTT_TYPE_PPGTT_PTE_PT) {
2335 struct intel_gvt_gtt_entry se;
2336
2337 memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
2338 se.type = get_entry_type(type: type - 1);
2339 ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
2340
2341 /* The entry parameters like present/writeable/cache type
2342 * set to the same as i915's scratch page tree.
2343 */
2344 se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
2345 if (type == GTT_TYPE_PPGTT_PDE_PT)
2346 se.val64 |= PPAT_CACHED;
2347
2348 for (i = 0; i < page_entry_num; i++)
2349 ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
2350 }
2351
2352 return 0;
2353}
2354
2355static int release_scratch_page_tree(struct intel_vgpu *vgpu)
2356{
2357 int i;
2358 struct device *dev = vgpu->gvt->gt->i915->drm.dev;
2359 dma_addr_t daddr;
2360
2361 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2362 if (vgpu->gtt.scratch_pt[i].page != NULL) {
2363 daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
2364 I915_GTT_PAGE_SHIFT);
2365 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
2366 __free_page(vgpu->gtt.scratch_pt[i].page);
2367 vgpu->gtt.scratch_pt[i].page = NULL;
2368 vgpu->gtt.scratch_pt[i].page_mfn = 0;
2369 }
2370 }
2371
2372 return 0;
2373}
2374
2375static int create_scratch_page_tree(struct intel_vgpu *vgpu)
2376{
2377 int i, ret;
2378
2379 for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2380 ret = alloc_scratch_pages(vgpu, type: i);
2381 if (ret)
2382 goto err;
2383 }
2384
2385 return 0;
2386
2387err:
2388 release_scratch_page_tree(vgpu);
2389 return ret;
2390}
2391
2392/**
2393 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
2394 * @vgpu: a vGPU
2395 *
2396 * This function is used to initialize per-vGPU graphics memory virtualization
2397 * components.
2398 *
2399 * Returns:
2400 * Zero on success, error code if failed.
2401 */
2402int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
2403{
2404 struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2405
2406 INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
2407
2408 INIT_LIST_HEAD(list: &gtt->ppgtt_mm_list_head);
2409 INIT_LIST_HEAD(list: &gtt->oos_page_list_head);
2410 INIT_LIST_HEAD(list: &gtt->post_shadow_list_head);
2411
2412 gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
2413 if (IS_ERR(ptr: gtt->ggtt_mm)) {
2414 gvt_vgpu_err("fail to create mm for ggtt.\n");
2415 return PTR_ERR(ptr: gtt->ggtt_mm);
2416 }
2417
2418 intel_vgpu_reset_ggtt(vgpu, invalidate_old: false);
2419
2420 INIT_LIST_HEAD(list: &gtt->ggtt_mm->ggtt_mm.partial_pte_list);
2421
2422 return create_scratch_page_tree(vgpu);
2423}
2424
2425void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
2426{
2427 struct list_head *pos, *n;
2428 struct intel_vgpu_mm *mm;
2429
2430 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2431 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2432 intel_vgpu_destroy_mm(mm);
2433 }
2434
2435 if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
2436 gvt_err("vgpu ppgtt mm is not fully destroyed\n");
2437
2438 if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
2439 gvt_err("Why we still has spt not freed?\n");
2440 ppgtt_free_all_spt(vgpu);
2441 }
2442}
2443
2444static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
2445{
2446 struct intel_gvt_partial_pte *pos, *next;
2447
2448 list_for_each_entry_safe(pos, next,
2449 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
2450 list) {
2451 gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
2452 pos->offset, pos->data);
2453 kfree(objp: pos);
2454 }
2455 intel_vgpu_destroy_mm(mm: vgpu->gtt.ggtt_mm);
2456 vgpu->gtt.ggtt_mm = NULL;
2457}
2458
2459/**
2460 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
2461 * @vgpu: a vGPU
2462 *
2463 * This function is used to clean up per-vGPU graphics memory virtualization
2464 * components.
2465 *
2466 * Returns:
2467 * Zero on success, error code if failed.
2468 */
2469void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
2470{
2471 intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2472 intel_vgpu_destroy_ggtt_mm(vgpu);
2473 release_scratch_page_tree(vgpu);
2474}
2475
2476static void clean_spt_oos(struct intel_gvt *gvt)
2477{
2478 struct intel_gvt_gtt *gtt = &gvt->gtt;
2479 struct list_head *pos, *n;
2480 struct intel_vgpu_oos_page *oos_page;
2481
2482 WARN(!list_empty(&gtt->oos_page_use_list_head),
2483 "someone is still using oos page\n");
2484
2485 list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
2486 oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
2487 list_del(entry: &oos_page->list);
2488 free_page((unsigned long)oos_page->mem);
2489 kfree(objp: oos_page);
2490 }
2491}
2492
2493static int setup_spt_oos(struct intel_gvt *gvt)
2494{
2495 struct intel_gvt_gtt *gtt = &gvt->gtt;
2496 struct intel_vgpu_oos_page *oos_page;
2497 int i;
2498 int ret;
2499
2500 INIT_LIST_HEAD(list: &gtt->oos_page_free_list_head);
2501 INIT_LIST_HEAD(list: &gtt->oos_page_use_list_head);
2502
2503 for (i = 0; i < preallocated_oos_pages; i++) {
2504 oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
2505 if (!oos_page) {
2506 ret = -ENOMEM;
2507 goto fail;
2508 }
2509 oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0);
2510 if (!oos_page->mem) {
2511 ret = -ENOMEM;
2512 kfree(objp: oos_page);
2513 goto fail;
2514 }
2515
2516 INIT_LIST_HEAD(list: &oos_page->list);
2517 INIT_LIST_HEAD(list: &oos_page->vm_list);
2518 oos_page->id = i;
2519 list_add_tail(new: &oos_page->list, head: &gtt->oos_page_free_list_head);
2520 }
2521
2522 gvt_dbg_mm("%d oos pages preallocated\n", i);
2523
2524 return 0;
2525fail:
2526 clean_spt_oos(gvt);
2527 return ret;
2528}
2529
2530/**
2531 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
2532 * @vgpu: a vGPU
2533 * @pdps: pdp root array
2534 *
2535 * This function is used to find a PPGTT mm object from mm object pool
2536 *
2537 * Returns:
2538 * pointer to mm object on success, NULL if failed.
2539 */
2540struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
2541 u64 pdps[])
2542{
2543 struct intel_vgpu_mm *mm;
2544 struct list_head *pos;
2545
2546 list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
2547 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2548
2549 switch (mm->ppgtt_mm.root_entry_type) {
2550 case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2551 if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
2552 return mm;
2553 break;
2554 case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2555 if (!memcmp(p: pdps, q: mm->ppgtt_mm.guest_pdps,
2556 size: sizeof(mm->ppgtt_mm.guest_pdps)))
2557 return mm;
2558 break;
2559 default:
2560 GEM_BUG_ON(1);
2561 }
2562 }
2563 return NULL;
2564}
2565
2566/**
2567 * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
2568 * @vgpu: a vGPU
2569 * @root_entry_type: ppgtt root entry type
2570 * @pdps: guest pdps
2571 *
2572 * This function is used to find or create a PPGTT mm object from a guest.
2573 *
2574 * Returns:
2575 * Zero on success, negative error code if failed.
2576 */
2577struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
2578 enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
2579{
2580 struct intel_vgpu_mm *mm;
2581
2582 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2583 if (mm) {
2584 intel_vgpu_mm_get(mm);
2585 } else {
2586 mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
2587 if (IS_ERR(ptr: mm))
2588 gvt_vgpu_err("fail to create mm\n");
2589 }
2590 return mm;
2591}
2592
2593/**
2594 * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
2595 * @vgpu: a vGPU
2596 * @pdps: guest pdps
2597 *
2598 * This function is used to find a PPGTT mm object from a guest and destroy it.
2599 *
2600 * Returns:
2601 * Zero on success, negative error code if failed.
2602 */
2603int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
2604{
2605 struct intel_vgpu_mm *mm;
2606
2607 mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2608 if (!mm) {
2609 gvt_vgpu_err("fail to find ppgtt instance.\n");
2610 return -EINVAL;
2611 }
2612 intel_vgpu_mm_put(mm);
2613 return 0;
2614}
2615
2616/**
2617 * intel_gvt_init_gtt - initialize mm components of a GVT device
2618 * @gvt: GVT device
2619 *
2620 * This function is called at the initialization stage, to initialize
2621 * the mm components of a GVT device.
2622 *
2623 * Returns:
2624 * zero on success, negative error code if failed.
2625 */
2626int intel_gvt_init_gtt(struct intel_gvt *gvt)
2627{
2628 int ret;
2629 void *page;
2630 struct device *dev = gvt->gt->i915->drm.dev;
2631 dma_addr_t daddr;
2632
2633 gvt_dbg_core("init gtt\n");
2634
2635 gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
2636 gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
2637
2638 page = (void *)get_zeroed_page(GFP_KERNEL);
2639 if (!page) {
2640 gvt_err("fail to allocate scratch ggtt page\n");
2641 return -ENOMEM;
2642 }
2643
2644 daddr = dma_map_page(dev, virt_to_page(page), 0,
2645 4096, DMA_BIDIRECTIONAL);
2646 if (dma_mapping_error(dev, dma_addr: daddr)) {
2647 gvt_err("fail to dmamap scratch ggtt page\n");
2648 __free_page(virt_to_page(page));
2649 return -ENOMEM;
2650 }
2651
2652 gvt->gtt.scratch_page = virt_to_page(page);
2653 gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2654
2655 if (enable_out_of_sync) {
2656 ret = setup_spt_oos(gvt);
2657 if (ret) {
2658 gvt_err("fail to initialize SPT oos\n");
2659 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
2660 __free_page(gvt->gtt.scratch_page);
2661 return ret;
2662 }
2663 }
2664 INIT_LIST_HEAD(list: &gvt->gtt.ppgtt_mm_lru_list_head);
2665 mutex_init(&gvt->gtt.ppgtt_mm_lock);
2666 return 0;
2667}
2668
2669/**
2670 * intel_gvt_clean_gtt - clean up mm components of a GVT device
2671 * @gvt: GVT device
2672 *
2673 * This function is called at the driver unloading stage, to clean up
2674 * the mm components of a GVT device.
2675 *
2676 */
2677void intel_gvt_clean_gtt(struct intel_gvt *gvt)
2678{
2679 struct device *dev = gvt->gt->i915->drm.dev;
2680 dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
2681 I915_GTT_PAGE_SHIFT);
2682
2683 dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
2684
2685 __free_page(gvt->gtt.scratch_page);
2686
2687 if (enable_out_of_sync)
2688 clean_spt_oos(gvt);
2689}
2690
2691/**
2692 * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
2693 * @vgpu: a vGPU
2694 *
2695 * This function is called when invalidate all PPGTT instances of a vGPU.
2696 *
2697 */
2698void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
2699{
2700 struct list_head *pos, *n;
2701 struct intel_vgpu_mm *mm;
2702
2703 list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2704 mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2705 if (mm->type == INTEL_GVT_MM_PPGTT) {
2706 mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2707 list_del_init(entry: &mm->ppgtt_mm.lru_list);
2708 mutex_unlock(lock: &vgpu->gvt->gtt.ppgtt_mm_lock);
2709 if (mm->ppgtt_mm.shadowed)
2710 invalidate_ppgtt_mm(mm);
2711 }
2712 }
2713}
2714
2715/**
2716 * intel_vgpu_reset_ggtt - reset the GGTT entry
2717 * @vgpu: a vGPU
2718 * @invalidate_old: invalidate old entries
2719 *
2720 * This function is called at the vGPU create stage
2721 * to reset all the GGTT entries.
2722 *
2723 */
2724void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
2725{
2726 struct intel_gvt *gvt = vgpu->gvt;
2727 const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2728 struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
2729 struct intel_gvt_gtt_entry old_entry;
2730 u32 index;
2731 u32 num_entries;
2732
2733 pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
2734 pte_ops->set_present(&entry);
2735
2736 index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2737 num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2738 while (num_entries--) {
2739 if (invalidate_old) {
2740 ggtt_get_host_entry(mm: vgpu->gtt.ggtt_mm, entry: &old_entry, index);
2741 ggtt_invalidate_pte(vgpu, entry: &old_entry);
2742 }
2743 ggtt_set_host_entry(mm: vgpu->gtt.ggtt_mm, entry: &entry, index: index++);
2744 }
2745
2746 index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2747 num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2748 while (num_entries--) {
2749 if (invalidate_old) {
2750 ggtt_get_host_entry(mm: vgpu->gtt.ggtt_mm, entry: &old_entry, index);
2751 ggtt_invalidate_pte(vgpu, entry: &old_entry);
2752 }
2753 ggtt_set_host_entry(mm: vgpu->gtt.ggtt_mm, entry: &entry, index: index++);
2754 }
2755
2756 ggtt_invalidate(gt: gvt->gt);
2757}
2758
2759/**
2760 * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
2761 * @gvt: intel gvt device
2762 *
2763 * This function is called at driver resume stage to restore
2764 * GGTT entries of every vGPU.
2765 *
2766 */
2767void intel_gvt_restore_ggtt(struct intel_gvt *gvt)
2768{
2769 struct intel_vgpu *vgpu;
2770 struct intel_vgpu_mm *mm;
2771 int id;
2772 gen8_pte_t pte;
2773 u32 idx, num_low, num_hi, offset;
2774
2775 /* Restore dirty host ggtt for all vGPUs */
2776 idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) {
2777 mm = vgpu->gtt.ggtt_mm;
2778
2779 num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2780 offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2781 for (idx = 0; idx < num_low; idx++) {
2782 pte = mm->ggtt_mm.host_ggtt_aperture[idx];
2783 if (pte & GEN8_PAGE_PRESENT)
2784 write_pte64(ggtt: vgpu->gvt->gt->ggtt, index: offset + idx, pte);
2785 }
2786
2787 num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2788 offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2789 for (idx = 0; idx < num_hi; idx++) {
2790 pte = mm->ggtt_mm.host_ggtt_hidden[idx];
2791 if (pte & GEN8_PAGE_PRESENT)
2792 write_pte64(ggtt: vgpu->gvt->gt->ggtt, index: offset + idx, pte);
2793 }
2794 }
2795}
2796

source code of linux/drivers/gpu/drm/i915/gvt/gtt.c