1// SPDX-License-Identifier: MIT
2/*
3 * Copyright 2022 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: AMD
24 *
25 */
26
27#include "dc.h"
28#include "../display_mode_lib.h"
29#include "display_mode_vba_314.h"
30#include "../dml_inline_defs.h"
31
32/*
33 * NOTE:
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41#define BPP_INVALID 0
42#define BPP_BLENDED_PIPE 0xffffffff
43#define DCN314_MAX_DSC_IMAGE_WIDTH 5184
44#define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096
45
46// For DML-C changes that hasn't been propagated to VBA yet
47//#define __DML_VBA_ALLOW_DELTA__
48
49// Move these to ip parameters/constant
50
51// At which vstartup the DML start to try if the mode can be supported
52#define __DML_VBA_MIN_VSTARTUP__ 9
53
54// Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
55#define __DML_ARB_TO_RET_DELAY__ (7 + 95)
56
57// fudge factor for min dcfclk calclation
58#define __DML_MIN_DCFCLK_FACTOR__ 1.15
59
60typedef struct {
61 double DPPCLK;
62 double DISPCLK;
63 double PixelClock;
64 double DCFCLKDeepSleep;
65 unsigned int DPPPerPlane;
66 bool ScalerEnabled;
67 double VRatio;
68 double VRatioChroma;
69 enum scan_direction_class SourceScan;
70 unsigned int BlockWidth256BytesY;
71 unsigned int BlockHeight256BytesY;
72 unsigned int BlockWidth256BytesC;
73 unsigned int BlockHeight256BytesC;
74 unsigned int InterlaceEnable;
75 unsigned int NumberOfCursors;
76 unsigned int VBlank;
77 unsigned int HTotal;
78 unsigned int DCCEnable;
79 bool ODMCombineIsEnabled;
80 enum source_format_class SourcePixelFormat;
81 int BytePerPixelY;
82 int BytePerPixelC;
83 bool ProgressiveToInterlaceUnitInOPP;
84} Pipe;
85
86#define BPP_INVALID 0
87#define BPP_BLENDED_PIPE 0xffffffff
88
89static bool CalculateBytePerPixelAnd256BBlockSizes(
90 enum source_format_class SourcePixelFormat,
91 enum dm_swizzle_mode SurfaceTiling,
92 unsigned int *BytePerPixelY,
93 unsigned int *BytePerPixelC,
94 double *BytePerPixelDETY,
95 double *BytePerPixelDETC,
96 unsigned int *BlockHeight256BytesY,
97 unsigned int *BlockHeight256BytesC,
98 unsigned int *BlockWidth256BytesY,
99 unsigned int *BlockWidth256BytesC);
100static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
101static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
102static unsigned int dscceComputeDelay(
103 unsigned int bpc,
104 double BPP,
105 unsigned int sliceWidth,
106 unsigned int numSlices,
107 enum output_format_class pixelFormat,
108 enum output_encoder_class Output);
109static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
110static bool CalculatePrefetchSchedule(
111 struct display_mode_lib *mode_lib,
112 double HostVMInefficiencyFactor,
113 Pipe *myPipe,
114 unsigned int DSCDelay,
115 double DPPCLKDelaySubtotalPlusCNVCFormater,
116 double DPPCLKDelaySCL,
117 double DPPCLKDelaySCLLBOnly,
118 double DPPCLKDelayCNVCCursor,
119 double DISPCLKDelaySubtotal,
120 unsigned int DPP_RECOUT_WIDTH,
121 enum output_format_class OutputFormat,
122 unsigned int MaxInterDCNTileRepeaters,
123 unsigned int VStartup,
124 unsigned int MaxVStartup,
125 unsigned int GPUVMPageTableLevels,
126 bool GPUVMEnable,
127 bool HostVMEnable,
128 unsigned int HostVMMaxNonCachedPageTableLevels,
129 double HostVMMinPageSize,
130 bool DynamicMetadataEnable,
131 bool DynamicMetadataVMEnabled,
132 int DynamicMetadataLinesBeforeActiveRequired,
133 unsigned int DynamicMetadataTransmittedBytes,
134 double UrgentLatency,
135 double UrgentExtraLatency,
136 double TCalc,
137 unsigned int PDEAndMetaPTEBytesFrame,
138 unsigned int MetaRowByte,
139 unsigned int PixelPTEBytesPerRow,
140 double PrefetchSourceLinesY,
141 unsigned int SwathWidthY,
142 double VInitPreFillY,
143 unsigned int MaxNumSwathY,
144 double PrefetchSourceLinesC,
145 unsigned int SwathWidthC,
146 double VInitPreFillC,
147 unsigned int MaxNumSwathC,
148 int swath_width_luma_ub,
149 int swath_width_chroma_ub,
150 unsigned int SwathHeightY,
151 unsigned int SwathHeightC,
152 double TWait,
153 double *DSTXAfterScaler,
154 double *DSTYAfterScaler,
155 double *DestinationLinesForPrefetch,
156 double *PrefetchBandwidth,
157 double *DestinationLinesToRequestVMInVBlank,
158 double *DestinationLinesToRequestRowInVBlank,
159 double *VRatioPrefetchY,
160 double *VRatioPrefetchC,
161 double *RequiredPrefetchPixDataBWLuma,
162 double *RequiredPrefetchPixDataBWChroma,
163 bool *NotEnoughTimeForDynamicMetadata,
164 double *Tno_bw,
165 double *prefetch_vmrow_bw,
166 double *Tdmdl_vm,
167 double *Tdmdl,
168 double *TSetup,
169 int *VUpdateOffsetPix,
170 double *VUpdateWidthPix,
171 double *VReadyOffsetPix);
172static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
173static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
174static void CalculateDCCConfiguration(
175 bool DCCEnabled,
176 bool DCCProgrammingAssumesScanDirectionUnknown,
177 enum source_format_class SourcePixelFormat,
178 unsigned int SurfaceWidthLuma,
179 unsigned int SurfaceWidthChroma,
180 unsigned int SurfaceHeightLuma,
181 unsigned int SurfaceHeightChroma,
182 double DETBufferSize,
183 unsigned int RequestHeight256ByteLuma,
184 unsigned int RequestHeight256ByteChroma,
185 enum dm_swizzle_mode TilingFormat,
186 unsigned int BytePerPixelY,
187 unsigned int BytePerPixelC,
188 double BytePerPixelDETY,
189 double BytePerPixelDETC,
190 enum scan_direction_class ScanOrientation,
191 unsigned int *MaxUncompressedBlockLuma,
192 unsigned int *MaxUncompressedBlockChroma,
193 unsigned int *MaxCompressedBlockLuma,
194 unsigned int *MaxCompressedBlockChroma,
195 unsigned int *IndependentBlockLuma,
196 unsigned int *IndependentBlockChroma);
197static double CalculatePrefetchSourceLines(
198 struct display_mode_lib *mode_lib,
199 double VRatio,
200 double vtaps,
201 bool Interlace,
202 bool ProgressiveToInterlaceUnitInOPP,
203 unsigned int SwathHeight,
204 unsigned int ViewportYStart,
205 double *VInitPreFill,
206 unsigned int *MaxNumSwath);
207static unsigned int CalculateVMAndRowBytes(
208 struct display_mode_lib *mode_lib,
209 bool DCCEnable,
210 unsigned int BlockHeight256Bytes,
211 unsigned int BlockWidth256Bytes,
212 enum source_format_class SourcePixelFormat,
213 unsigned int SurfaceTiling,
214 unsigned int BytePerPixel,
215 enum scan_direction_class ScanDirection,
216 unsigned int SwathWidth,
217 unsigned int ViewportHeight,
218 bool GPUVMEnable,
219 bool HostVMEnable,
220 unsigned int HostVMMaxNonCachedPageTableLevels,
221 unsigned int GPUVMMinPageSize,
222 unsigned int HostVMMinPageSize,
223 unsigned int PTEBufferSizeInRequests,
224 unsigned int Pitch,
225 unsigned int DCCMetaPitch,
226 unsigned int *MacroTileWidth,
227 unsigned int *MetaRowByte,
228 unsigned int *PixelPTEBytesPerRow,
229 bool *PTEBufferSizeNotExceeded,
230 int *dpte_row_width_ub,
231 unsigned int *dpte_row_height,
232 unsigned int *MetaRequestWidth,
233 unsigned int *MetaRequestHeight,
234 unsigned int *meta_row_width,
235 unsigned int *meta_row_height,
236 int *vm_group_bytes,
237 unsigned int *dpte_group_bytes,
238 unsigned int *PixelPTEReqWidth,
239 unsigned int *PixelPTEReqHeight,
240 unsigned int *PTERequestSize,
241 int *DPDE0BytesFrame,
242 int *MetaPTEBytesFrame);
243static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
244static void CalculateRowBandwidth(
245 bool GPUVMEnable,
246 enum source_format_class SourcePixelFormat,
247 double VRatio,
248 double VRatioChroma,
249 bool DCCEnable,
250 double LineTime,
251 unsigned int MetaRowByteLuma,
252 unsigned int MetaRowByteChroma,
253 unsigned int meta_row_height_luma,
254 unsigned int meta_row_height_chroma,
255 unsigned int PixelPTEBytesPerRowLuma,
256 unsigned int PixelPTEBytesPerRowChroma,
257 unsigned int dpte_row_height_luma,
258 unsigned int dpte_row_height_chroma,
259 double *meta_row_bw,
260 double *dpte_row_bw);
261
262static void CalculateFlipSchedule(
263 struct display_mode_lib *mode_lib,
264 unsigned int k,
265 double HostVMInefficiencyFactor,
266 double UrgentExtraLatency,
267 double UrgentLatency,
268 double PDEAndMetaPTEBytesPerFrame,
269 double MetaRowBytes,
270 double DPTEBytesPerRow);
271static double CalculateWriteBackDelay(
272 enum source_format_class WritebackPixelFormat,
273 double WritebackHRatio,
274 double WritebackVRatio,
275 unsigned int WritebackVTaps,
276 int WritebackDestinationWidth,
277 int WritebackDestinationHeight,
278 int WritebackSourceHeight,
279 unsigned int HTotal);
280
281static void CalculateVupdateAndDynamicMetadataParameters(
282 int MaxInterDCNTileRepeaters,
283 double DPPCLK,
284 double DISPCLK,
285 double DCFClkDeepSleep,
286 double PixelClock,
287 int HTotal,
288 int VBlank,
289 int DynamicMetadataTransmittedBytes,
290 int DynamicMetadataLinesBeforeActiveRequired,
291 int InterlaceEnable,
292 bool ProgressiveToInterlaceUnitInOPP,
293 double *TSetup,
294 double *Tdmbf,
295 double *Tdmec,
296 double *Tdmsks,
297 int *VUpdateOffsetPix,
298 double *VUpdateWidthPix,
299 double *VReadyOffsetPix);
300
301static void CalculateWatermarksAndDRAMSpeedChangeSupport(
302 struct display_mode_lib *mode_lib,
303 unsigned int PrefetchMode,
304 double DCFCLK,
305 double ReturnBW,
306 double UrgentLatency,
307 double ExtraLatency,
308 double SOCCLK,
309 double DCFCLKDeepSleep,
310 unsigned int DETBufferSizeY[],
311 unsigned int DETBufferSizeC[],
312 unsigned int SwathHeightY[],
313 unsigned int SwathHeightC[],
314 double SwathWidthY[],
315 double SwathWidthC[],
316 unsigned int DPPPerPlane[],
317 double BytePerPixelDETY[],
318 double BytePerPixelDETC[],
319 bool UnboundedRequestEnabled,
320 unsigned int CompressedBufferSizeInkByte,
321 enum clock_change_support *DRAMClockChangeSupport,
322 double *StutterExitWatermark,
323 double *StutterEnterPlusExitWatermark,
324 double *Z8StutterExitWatermark,
325 double *Z8StutterEnterPlusExitWatermark);
326
327static void CalculateDCFCLKDeepSleep(
328 struct display_mode_lib *mode_lib,
329 unsigned int NumberOfActivePlanes,
330 int BytePerPixelY[],
331 int BytePerPixelC[],
332 double VRatio[],
333 double VRatioChroma[],
334 double SwathWidthY[],
335 double SwathWidthC[],
336 unsigned int DPPPerPlane[],
337 double HRatio[],
338 double HRatioChroma[],
339 double PixelClock[],
340 double PSCL_THROUGHPUT[],
341 double PSCL_THROUGHPUT_CHROMA[],
342 double DPPCLK[],
343 double ReadBandwidthLuma[],
344 double ReadBandwidthChroma[],
345 int ReturnBusWidth,
346 double *DCFCLKDeepSleep);
347
348static void CalculateUrgentBurstFactor(
349 int swath_width_luma_ub,
350 int swath_width_chroma_ub,
351 unsigned int SwathHeightY,
352 unsigned int SwathHeightC,
353 double LineTime,
354 double UrgentLatency,
355 double CursorBufferSize,
356 unsigned int CursorWidth,
357 unsigned int CursorBPP,
358 double VRatio,
359 double VRatioC,
360 double BytePerPixelInDETY,
361 double BytePerPixelInDETC,
362 double DETBufferSizeY,
363 double DETBufferSizeC,
364 double *UrgentBurstFactorCursor,
365 double *UrgentBurstFactorLuma,
366 double *UrgentBurstFactorChroma,
367 bool *NotEnoughUrgentLatencyHiding);
368
369static void UseMinimumDCFCLK(
370 struct display_mode_lib *mode_lib,
371 int MaxPrefetchMode,
372 int ReorderingBytes);
373
374static void CalculatePixelDeliveryTimes(
375 unsigned int NumberOfActivePlanes,
376 double VRatio[],
377 double VRatioChroma[],
378 double VRatioPrefetchY[],
379 double VRatioPrefetchC[],
380 unsigned int swath_width_luma_ub[],
381 unsigned int swath_width_chroma_ub[],
382 unsigned int DPPPerPlane[],
383 double HRatio[],
384 double HRatioChroma[],
385 double PixelClock[],
386 double PSCL_THROUGHPUT[],
387 double PSCL_THROUGHPUT_CHROMA[],
388 double DPPCLK[],
389 int BytePerPixelC[],
390 enum scan_direction_class SourceScan[],
391 unsigned int NumberOfCursors[],
392 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
393 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
394 unsigned int BlockWidth256BytesY[],
395 unsigned int BlockHeight256BytesY[],
396 unsigned int BlockWidth256BytesC[],
397 unsigned int BlockHeight256BytesC[],
398 double DisplayPipeLineDeliveryTimeLuma[],
399 double DisplayPipeLineDeliveryTimeChroma[],
400 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
401 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
402 double DisplayPipeRequestDeliveryTimeLuma[],
403 double DisplayPipeRequestDeliveryTimeChroma[],
404 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
405 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
406 double CursorRequestDeliveryTime[],
407 double CursorRequestDeliveryTimePrefetch[]);
408
409static void CalculateMetaAndPTETimes(
410 int NumberOfActivePlanes,
411 bool GPUVMEnable,
412 int MetaChunkSize,
413 int MinMetaChunkSizeBytes,
414 int HTotal[],
415 double VRatio[],
416 double VRatioChroma[],
417 double DestinationLinesToRequestRowInVBlank[],
418 double DestinationLinesToRequestRowInImmediateFlip[],
419 bool DCCEnable[],
420 double PixelClock[],
421 int BytePerPixelY[],
422 int BytePerPixelC[],
423 enum scan_direction_class SourceScan[],
424 int dpte_row_height[],
425 int dpte_row_height_chroma[],
426 int meta_row_width[],
427 int meta_row_width_chroma[],
428 int meta_row_height[],
429 int meta_row_height_chroma[],
430 int meta_req_width[],
431 int meta_req_width_chroma[],
432 int meta_req_height[],
433 int meta_req_height_chroma[],
434 int dpte_group_bytes[],
435 int PTERequestSizeY[],
436 int PTERequestSizeC[],
437 int PixelPTEReqWidthY[],
438 int PixelPTEReqHeightY[],
439 int PixelPTEReqWidthC[],
440 int PixelPTEReqHeightC[],
441 int dpte_row_width_luma_ub[],
442 int dpte_row_width_chroma_ub[],
443 double DST_Y_PER_PTE_ROW_NOM_L[],
444 double DST_Y_PER_PTE_ROW_NOM_C[],
445 double DST_Y_PER_META_ROW_NOM_L[],
446 double DST_Y_PER_META_ROW_NOM_C[],
447 double TimePerMetaChunkNominal[],
448 double TimePerChromaMetaChunkNominal[],
449 double TimePerMetaChunkVBlank[],
450 double TimePerChromaMetaChunkVBlank[],
451 double TimePerMetaChunkFlip[],
452 double TimePerChromaMetaChunkFlip[],
453 double time_per_pte_group_nom_luma[],
454 double time_per_pte_group_vblank_luma[],
455 double time_per_pte_group_flip_luma[],
456 double time_per_pte_group_nom_chroma[],
457 double time_per_pte_group_vblank_chroma[],
458 double time_per_pte_group_flip_chroma[]);
459
460static void CalculateVMGroupAndRequestTimes(
461 unsigned int NumberOfActivePlanes,
462 bool GPUVMEnable,
463 unsigned int GPUVMMaxPageTableLevels,
464 unsigned int HTotal[],
465 int BytePerPixelC[],
466 double DestinationLinesToRequestVMInVBlank[],
467 double DestinationLinesToRequestVMInImmediateFlip[],
468 bool DCCEnable[],
469 double PixelClock[],
470 int dpte_row_width_luma_ub[],
471 int dpte_row_width_chroma_ub[],
472 int vm_group_bytes[],
473 unsigned int dpde0_bytes_per_frame_ub_l[],
474 unsigned int dpde0_bytes_per_frame_ub_c[],
475 int meta_pte_bytes_per_frame_ub_l[],
476 int meta_pte_bytes_per_frame_ub_c[],
477 double TimePerVMGroupVBlank[],
478 double TimePerVMGroupFlip[],
479 double TimePerVMRequestVBlank[],
480 double TimePerVMRequestFlip[]);
481
482static void CalculateStutterEfficiency(
483 struct display_mode_lib *mode_lib,
484 int CompressedBufferSizeInkByte,
485 bool UnboundedRequestEnabled,
486 int ConfigReturnBufferSizeInKByte,
487 int MetaFIFOSizeInKEntries,
488 int ZeroSizeBufferEntries,
489 int NumberOfActivePlanes,
490 int ROBBufferSizeInKByte,
491 double TotalDataReadBandwidth,
492 double DCFCLK,
493 double ReturnBW,
494 double COMPBUF_RESERVED_SPACE_64B,
495 double COMPBUF_RESERVED_SPACE_ZS,
496 double SRExitTime,
497 double SRExitZ8Time,
498 bool SynchronizedVBlank,
499 double Z8StutterEnterPlusExitWatermark,
500 double StutterEnterPlusExitWatermark,
501 bool ProgressiveToInterlaceUnitInOPP,
502 bool Interlace[],
503 double MinTTUVBlank[],
504 int DPPPerPlane[],
505 unsigned int DETBufferSizeY[],
506 int BytePerPixelY[],
507 double BytePerPixelDETY[],
508 double SwathWidthY[],
509 int SwathHeightY[],
510 int SwathHeightC[],
511 double NetDCCRateLuma[],
512 double NetDCCRateChroma[],
513 double DCCFractionOfZeroSizeRequestsLuma[],
514 double DCCFractionOfZeroSizeRequestsChroma[],
515 int HTotal[],
516 int VTotal[],
517 double PixelClock[],
518 double VRatio[],
519 enum scan_direction_class SourceScan[],
520 int BlockHeight256BytesY[],
521 int BlockWidth256BytesY[],
522 int BlockHeight256BytesC[],
523 int BlockWidth256BytesC[],
524 int DCCYMaxUncompressedBlock[],
525 int DCCCMaxUncompressedBlock[],
526 int VActive[],
527 bool DCCEnable[],
528 bool WritebackEnable[],
529 double ReadBandwidthPlaneLuma[],
530 double ReadBandwidthPlaneChroma[],
531 double meta_row_bw[],
532 double dpte_row_bw[],
533 double *StutterEfficiencyNotIncludingVBlank,
534 double *StutterEfficiency,
535 int *NumberOfStutterBurstsPerFrame,
536 double *Z8StutterEfficiencyNotIncludingVBlank,
537 double *Z8StutterEfficiency,
538 int *Z8NumberOfStutterBurstsPerFrame,
539 double *StutterPeriod);
540
541static void CalculateSwathAndDETConfiguration(
542 bool ForceSingleDPP,
543 int NumberOfActivePlanes,
544 unsigned int DETBufferSizeInKByte,
545 double MaximumSwathWidthLuma[],
546 double MaximumSwathWidthChroma[],
547 enum scan_direction_class SourceScan[],
548 enum source_format_class SourcePixelFormat[],
549 enum dm_swizzle_mode SurfaceTiling[],
550 int ViewportWidth[],
551 int ViewportHeight[],
552 int SurfaceWidthY[],
553 int SurfaceWidthC[],
554 int SurfaceHeightY[],
555 int SurfaceHeightC[],
556 int Read256BytesBlockHeightY[],
557 int Read256BytesBlockHeightC[],
558 int Read256BytesBlockWidthY[],
559 int Read256BytesBlockWidthC[],
560 enum odm_combine_mode ODMCombineEnabled[],
561 int BlendingAndTiming[],
562 int BytePerPixY[],
563 int BytePerPixC[],
564 double BytePerPixDETY[],
565 double BytePerPixDETC[],
566 int HActive[],
567 double HRatio[],
568 double HRatioChroma[],
569 int DPPPerPlane[],
570 int swath_width_luma_ub[],
571 int swath_width_chroma_ub[],
572 double SwathWidth[],
573 double SwathWidthChroma[],
574 int SwathHeightY[],
575 int SwathHeightC[],
576 unsigned int DETBufferSizeY[],
577 unsigned int DETBufferSizeC[],
578 bool ViewportSizeSupportPerPlane[],
579 bool *ViewportSizeSupport);
580static void CalculateSwathWidth(
581 bool ForceSingleDPP,
582 int NumberOfActivePlanes,
583 enum source_format_class SourcePixelFormat[],
584 enum scan_direction_class SourceScan[],
585 int ViewportWidth[],
586 int ViewportHeight[],
587 int SurfaceWidthY[],
588 int SurfaceWidthC[],
589 int SurfaceHeightY[],
590 int SurfaceHeightC[],
591 enum odm_combine_mode ODMCombineEnabled[],
592 int BytePerPixY[],
593 int BytePerPixC[],
594 int Read256BytesBlockHeightY[],
595 int Read256BytesBlockHeightC[],
596 int Read256BytesBlockWidthY[],
597 int Read256BytesBlockWidthC[],
598 int BlendingAndTiming[],
599 int HActive[],
600 double HRatio[],
601 int DPPPerPlane[],
602 double SwathWidthSingleDPPY[],
603 double SwathWidthSingleDPPC[],
604 double SwathWidthY[],
605 double SwathWidthC[],
606 int MaximumSwathHeightY[],
607 int MaximumSwathHeightC[],
608 int swath_width_luma_ub[],
609 int swath_width_chroma_ub[]);
610
611static double CalculateExtraLatency(
612 int RoundTripPingLatencyCycles,
613 int ReorderingBytes,
614 double DCFCLK,
615 int TotalNumberOfActiveDPP,
616 int PixelChunkSizeInKByte,
617 int TotalNumberOfDCCActiveDPP,
618 int MetaChunkSize,
619 double ReturnBW,
620 bool GPUVMEnable,
621 bool HostVMEnable,
622 int NumberOfActivePlanes,
623 int NumberOfDPP[],
624 int dpte_group_bytes[],
625 double HostVMInefficiencyFactor,
626 double HostVMMinPageSize,
627 int HostVMMaxNonCachedPageTableLevels);
628
629static double CalculateExtraLatencyBytes(
630 int ReorderingBytes,
631 int TotalNumberOfActiveDPP,
632 int PixelChunkSizeInKByte,
633 int TotalNumberOfDCCActiveDPP,
634 int MetaChunkSize,
635 bool GPUVMEnable,
636 bool HostVMEnable,
637 int NumberOfActivePlanes,
638 int NumberOfDPP[],
639 int dpte_group_bytes[],
640 double HostVMInefficiencyFactor,
641 double HostVMMinPageSize,
642 int HostVMMaxNonCachedPageTableLevels);
643
644static double CalculateUrgentLatency(
645 double UrgentLatencyPixelDataOnly,
646 double UrgentLatencyPixelMixedWithVMData,
647 double UrgentLatencyVMDataOnly,
648 bool DoUrgentLatencyAdjustment,
649 double UrgentLatencyAdjustmentFabricClockComponent,
650 double UrgentLatencyAdjustmentFabricClockReference,
651 double FabricClockSingle);
652
653static void CalculateUnboundedRequestAndCompressedBufferSize(
654 unsigned int DETBufferSizeInKByte,
655 int ConfigReturnBufferSizeInKByte,
656 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
657 int TotalActiveDPP,
658 bool NoChromaPlanes,
659 int MaxNumDPP,
660 int CompressedBufferSegmentSizeInkByteFinal,
661 enum output_encoder_class *Output,
662 bool *UnboundedRequestEnabled,
663 int *CompressedBufferSizeInkByte);
664
665static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
666static unsigned int CalculateMaxVStartup(
667 unsigned int VTotal,
668 unsigned int VActive,
669 unsigned int VBlankNom,
670 unsigned int HTotal,
671 double PixelClock,
672 bool ProgressiveTointerlaceUnitinOPP,
673 bool Interlace,
674 unsigned int VBlankNomDefaultUS,
675 double WritebackDelayTime);
676
677void dml314_recalculate(struct display_mode_lib *mode_lib)
678{
679 ModeSupportAndSystemConfiguration(mode_lib);
680 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
681 DisplayPipeConfiguration(mode_lib);
682#ifdef __DML_VBA_DEBUG__
683 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
684#endif
685 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
686}
687
688static unsigned int dscceComputeDelay(
689 unsigned int bpc,
690 double BPP,
691 unsigned int sliceWidth,
692 unsigned int numSlices,
693 enum output_format_class pixelFormat,
694 enum output_encoder_class Output)
695{
696 // valid bpc = source bits per component in the set of {8, 10, 12}
697 // valid bpp = increments of 1/16 of a bit
698 // min = 6/7/8 in N420/N422/444, respectively
699 // max = such that compression is 1:1
700 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
701 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
702 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
703
704 // fixed value
705 unsigned int rcModelSize = 8192;
706
707 // N422/N420 operate at 2 pixels per clock
708 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
709
710 if (pixelFormat == dm_420)
711 pixelsPerClock = 2;
712 else if (pixelFormat == dm_444)
713 pixelsPerClock = 1;
714 else if (pixelFormat == dm_n422)
715 pixelsPerClock = 2;
716 // #all other modes operate at 1 pixel per clock
717 else
718 pixelsPerClock = 1;
719
720 //initial transmit delay as per PPS
721 initalXmitDelay = dml_round(a: rcModelSize / 2.0 / BPP / pixelsPerClock);
722
723 //compute ssm delay
724 if (bpc == 8)
725 D = 81;
726 else if (bpc == 10)
727 D = 89;
728 else
729 D = 113;
730
731 //divide by pixel per cycle to compute slice width as seen by DSC
732 w = sliceWidth / pixelsPerClock;
733
734 //422 mode has an additional cycle of delay
735 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
736 s = 0;
737 else
738 s = 1;
739
740 //main calculation for the dscce
741 ix = initalXmitDelay + 45;
742 wx = (w + 2) / 3;
743 P = 3 * wx - w;
744 l0 = ix / w;
745 a = ix + P * l0;
746 ax = (a + 2) / 3 + D + 6 + 1;
747 L = (ax + wx - 1) / wx;
748 if ((ix % w) == 0 && P != 0)
749 lstall = 1;
750 else
751 lstall = 0;
752 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
753
754 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
755 pixels = Delay * 3 * pixelsPerClock;
756 return pixels;
757}
758
759static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
760{
761 unsigned int Delay = 0;
762
763 if (pixelFormat == dm_420) {
764 // sfr
765 Delay = Delay + 2;
766 // dsccif
767 Delay = Delay + 0;
768 // dscc - input deserializer
769 Delay = Delay + 3;
770 // dscc gets pixels every other cycle
771 Delay = Delay + 2;
772 // dscc - input cdc fifo
773 Delay = Delay + 12;
774 // dscc gets pixels every other cycle
775 Delay = Delay + 13;
776 // dscc - cdc uncertainty
777 Delay = Delay + 2;
778 // dscc - output cdc fifo
779 Delay = Delay + 7;
780 // dscc gets pixels every other cycle
781 Delay = Delay + 3;
782 // dscc - cdc uncertainty
783 Delay = Delay + 2;
784 // dscc - output serializer
785 Delay = Delay + 1;
786 // sft
787 Delay = Delay + 1;
788 } else if (pixelFormat == dm_n422) {
789 // sfr
790 Delay = Delay + 2;
791 // dsccif
792 Delay = Delay + 1;
793 // dscc - input deserializer
794 Delay = Delay + 5;
795 // dscc - input cdc fifo
796 Delay = Delay + 25;
797 // dscc - cdc uncertainty
798 Delay = Delay + 2;
799 // dscc - output cdc fifo
800 Delay = Delay + 10;
801 // dscc - cdc uncertainty
802 Delay = Delay + 2;
803 // dscc - output serializer
804 Delay = Delay + 1;
805 // sft
806 Delay = Delay + 1;
807 } else {
808 // sfr
809 Delay = Delay + 2;
810 // dsccif
811 Delay = Delay + 0;
812 // dscc - input deserializer
813 Delay = Delay + 3;
814 // dscc - input cdc fifo
815 Delay = Delay + 12;
816 // dscc - cdc uncertainty
817 Delay = Delay + 2;
818 // dscc - output cdc fifo
819 Delay = Delay + 7;
820 // dscc - output serializer
821 Delay = Delay + 1;
822 // dscc - cdc uncertainty
823 Delay = Delay + 2;
824 // sft
825 Delay = Delay + 1;
826 }
827
828 return Delay;
829}
830
831static bool CalculatePrefetchSchedule(
832 struct display_mode_lib *mode_lib,
833 double HostVMInefficiencyFactor,
834 Pipe *myPipe,
835 unsigned int DSCDelay,
836 double DPPCLKDelaySubtotalPlusCNVCFormater,
837 double DPPCLKDelaySCL,
838 double DPPCLKDelaySCLLBOnly,
839 double DPPCLKDelayCNVCCursor,
840 double DISPCLKDelaySubtotal,
841 unsigned int DPP_RECOUT_WIDTH,
842 enum output_format_class OutputFormat,
843 unsigned int MaxInterDCNTileRepeaters,
844 unsigned int VStartup,
845 unsigned int MaxVStartup,
846 unsigned int GPUVMPageTableLevels,
847 bool GPUVMEnable,
848 bool HostVMEnable,
849 unsigned int HostVMMaxNonCachedPageTableLevels,
850 double HostVMMinPageSize,
851 bool DynamicMetadataEnable,
852 bool DynamicMetadataVMEnabled,
853 int DynamicMetadataLinesBeforeActiveRequired,
854 unsigned int DynamicMetadataTransmittedBytes,
855 double UrgentLatency,
856 double UrgentExtraLatency,
857 double TCalc,
858 unsigned int PDEAndMetaPTEBytesFrame,
859 unsigned int MetaRowByte,
860 unsigned int PixelPTEBytesPerRow,
861 double PrefetchSourceLinesY,
862 unsigned int SwathWidthY,
863 double VInitPreFillY,
864 unsigned int MaxNumSwathY,
865 double PrefetchSourceLinesC,
866 unsigned int SwathWidthC,
867 double VInitPreFillC,
868 unsigned int MaxNumSwathC,
869 int swath_width_luma_ub,
870 int swath_width_chroma_ub,
871 unsigned int SwathHeightY,
872 unsigned int SwathHeightC,
873 double TWait,
874 double *DSTXAfterScaler,
875 double *DSTYAfterScaler,
876 double *DestinationLinesForPrefetch,
877 double *PrefetchBandwidth,
878 double *DestinationLinesToRequestVMInVBlank,
879 double *DestinationLinesToRequestRowInVBlank,
880 double *VRatioPrefetchY,
881 double *VRatioPrefetchC,
882 double *RequiredPrefetchPixDataBWLuma,
883 double *RequiredPrefetchPixDataBWChroma,
884 bool *NotEnoughTimeForDynamicMetadata,
885 double *Tno_bw,
886 double *prefetch_vmrow_bw,
887 double *Tdmdl_vm,
888 double *Tdmdl,
889 double *TSetup,
890 int *VUpdateOffsetPix,
891 double *VUpdateWidthPix,
892 double *VReadyOffsetPix)
893{
894 bool MyError = false;
895 unsigned int DPPCycles, DISPCLKCycles;
896 double DSTTotalPixelsAfterScaler;
897 double LineTime;
898 double dst_y_prefetch_equ;
899#ifdef __DML_VBA_DEBUG__
900 double Tsw_oto;
901#endif
902 double prefetch_bw_oto;
903 double prefetch_bw_pr;
904 double Tvm_oto;
905 double Tr0_oto;
906 double Tvm_oto_lines;
907 double Tr0_oto_lines;
908 double dst_y_prefetch_oto;
909 double TimeForFetchingMetaPTE = 0;
910 double TimeForFetchingRowInVBlank = 0;
911 double LinesToRequestPrefetchPixelData = 0;
912 unsigned int HostVMDynamicLevelsTrips;
913 double trip_to_mem;
914 double Tvm_trips;
915 double Tr0_trips;
916 double Tvm_trips_rounded;
917 double Tr0_trips_rounded;
918 double Lsw_oto;
919 double Tpre_rounded;
920 double prefetch_bw_equ;
921 double Tvm_equ;
922 double Tr0_equ;
923 double Tdmbf;
924 double Tdmec;
925 double Tdmsks;
926 double prefetch_sw_bytes;
927 double bytes_pp;
928 double dep_bytes;
929 int max_vratio_pre = 4;
930 double min_Lsw;
931 double Tsw_est1 = 0;
932 double Tsw_est3 = 0;
933 double max_Tsw = 0;
934
935 if (GPUVMEnable == true && HostVMEnable == true) {
936 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
937 } else {
938 HostVMDynamicLevelsTrips = 0;
939 }
940#ifdef __DML_VBA_DEBUG__
941 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
942#endif
943 CalculateVupdateAndDynamicMetadataParameters(
944 MaxInterDCNTileRepeaters,
945 DPPCLK: myPipe->DPPCLK,
946 DISPCLK: myPipe->DISPCLK,
947 DCFClkDeepSleep: myPipe->DCFCLKDeepSleep,
948 PixelClock: myPipe->PixelClock,
949 HTotal: myPipe->HTotal,
950 VBlank: myPipe->VBlank,
951 DynamicMetadataTransmittedBytes,
952 DynamicMetadataLinesBeforeActiveRequired,
953 InterlaceEnable: myPipe->InterlaceEnable,
954 ProgressiveToInterlaceUnitInOPP: myPipe->ProgressiveToInterlaceUnitInOPP,
955 TSetup,
956 Tdmbf: &Tdmbf,
957 Tdmec: &Tdmec,
958 Tdmsks: &Tdmsks,
959 VUpdateOffsetPix,
960 VUpdateWidthPix,
961 VReadyOffsetPix);
962
963 LineTime = myPipe->HTotal / myPipe->PixelClock;
964 trip_to_mem = UrgentLatency;
965 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
966
967#ifdef __DML_VBA_ALLOW_DELTA__
968 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
969#else
970 if (DynamicMetadataVMEnabled == true) {
971#endif
972 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
973 } else {
974 *Tdmdl = TWait + UrgentExtraLatency;
975 }
976
977#ifdef __DML_VBA_ALLOW_DELTA__
978 if (DynamicMetadataEnable == false) {
979 *Tdmdl = 0.0;
980 }
981#endif
982
983 if (DynamicMetadataEnable == true) {
984 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
985 *NotEnoughTimeForDynamicMetadata = true;
986 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
987 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
988 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
989 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
990 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
991 } else {
992 *NotEnoughTimeForDynamicMetadata = false;
993 }
994 } else {
995 *NotEnoughTimeForDynamicMetadata = false;
996 }
997
998 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
999
1000 if (myPipe->ScalerEnabled)
1001 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
1002 else
1003 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
1004
1005 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
1006
1007 DISPCLKCycles = DISPCLKDelaySubtotal;
1008
1009 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
1010 return true;
1011
1012 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
1013
1014#ifdef __DML_VBA_DEBUG__
1015 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
1016 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
1017 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1018 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1019 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1020 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1021 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1022 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1023#endif
1024
1025 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1026
1027 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1028 *DSTYAfterScaler = 1;
1029 else
1030 *DSTYAfterScaler = 0;
1031
1032 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1033 *DSTYAfterScaler = dml_floor(a: DSTTotalPixelsAfterScaler / myPipe->HTotal, granularity: 1);
1034 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1035
1036#ifdef __DML_VBA_DEBUG__
1037 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1038#endif
1039
1040 MyError = false;
1041
1042 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1043 Tvm_trips_rounded = dml_ceil(a: 4.0 * Tvm_trips / LineTime, granularity: 1) / 4 * LineTime;
1044 Tr0_trips_rounded = dml_ceil(a: 4.0 * Tr0_trips / LineTime, granularity: 1) / 4 * LineTime;
1045
1046#ifdef __DML_VBA_ALLOW_DELTA__
1047 if (!myPipe->DCCEnable) {
1048 Tr0_trips = 0.0;
1049 Tr0_trips_rounded = 0.0;
1050 }
1051#endif
1052
1053 if (!GPUVMEnable) {
1054 Tvm_trips = 0.0;
1055 Tvm_trips_rounded = 0.0;
1056 }
1057
1058 if (GPUVMEnable) {
1059 if (GPUVMPageTableLevels >= 3) {
1060 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1061 } else {
1062 *Tno_bw = 0;
1063 }
1064 } else if (!myPipe->DCCEnable) {
1065 *Tno_bw = LineTime;
1066 } else {
1067 *Tno_bw = LineTime / 4;
1068 }
1069
1070 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1071 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1072 else
1073 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1074 /*rev 99*/
1075 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
1076 prefetch_bw_pr = dml_min(a: 1, b: myPipe->VRatio) * prefetch_bw_pr;
1077 max_Tsw = dml_max(a: PrefetchSourceLinesY, b: PrefetchSourceLinesC) * LineTime;
1078 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1079 prefetch_bw_oto = dml_max(a: prefetch_bw_pr, b: prefetch_sw_bytes / max_Tsw);
1080
1081 min_Lsw = dml_max(a: 1, b: dml_max(a: PrefetchSourceLinesY, b: PrefetchSourceLinesC) / max_vratio_pre);
1082 Lsw_oto = dml_ceil(a: 4 * dml_max(a: prefetch_sw_bytes / prefetch_bw_oto / LineTime, b: min_Lsw), granularity: 1) / 4;
1083#ifdef __DML_VBA_DEBUG__
1084 Tsw_oto = Lsw_oto * LineTime;
1085#endif
1086
1087
1088#ifdef __DML_VBA_DEBUG__
1089 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1090 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1091 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1092 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1093 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1094 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1095#endif
1096
1097 if (GPUVMEnable == true)
1098 Tvm_oto = dml_max3(a: *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, b: Tvm_trips, c: LineTime / 4.0);
1099 else
1100 Tvm_oto = LineTime / 4.0;
1101
1102 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1103 Tr0_oto = dml_max4(a: (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, b: Tr0_trips, // PREVIOUS_ERROR (missing this term)
1104 c: LineTime - Tvm_oto,
1105 d: LineTime / 4);
1106 } else {
1107 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1108 }
1109
1110#ifdef __DML_VBA_DEBUG__
1111 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1112 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1113 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1114 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1115 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1116 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1117 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1118 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1119 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1120#endif
1121
1122 Tvm_oto_lines = dml_ceil(a: 4.0 * Tvm_oto / LineTime, granularity: 1) / 4.0;
1123 Tr0_oto_lines = dml_ceil(a: 4.0 * Tr0_oto / LineTime, granularity: 1) / 4.0;
1124 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1125 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(a: TWait + TCalc, b: *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1126 dst_y_prefetch_equ = dml_min(a: dst_y_prefetch_equ, b: 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
1127 dst_y_prefetch_equ = dml_floor(a: 4.0 * (dst_y_prefetch_equ + 0.125), granularity: 1) / 4.0;
1128 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1129
1130 dep_bytes = dml_max(a: PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, b: MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1131
1132 if (prefetch_sw_bytes < dep_bytes)
1133 prefetch_sw_bytes = 2 * dep_bytes;
1134
1135 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1136 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1137 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1138 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1139 dml_print("DML: LineTime: %f\n", LineTime);
1140 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1141
1142 dml_print("DML: LineTime: %f\n", LineTime);
1143 dml_print("DML: VStartup: %d\n", VStartup);
1144 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1145 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1146 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1147 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1148 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1149 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1150 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1151 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm);
1152 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl);
1153 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler);
1154 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler);
1155
1156 *PrefetchBandwidth = 0;
1157 *DestinationLinesToRequestVMInVBlank = 0;
1158 *DestinationLinesToRequestRowInVBlank = 0;
1159 *VRatioPrefetchY = 0;
1160 *VRatioPrefetchC = 0;
1161 *RequiredPrefetchPixDataBWLuma = 0;
1162 if (dst_y_prefetch_equ > 1) {
1163 double PrefetchBandwidth1;
1164 double PrefetchBandwidth2;
1165 double PrefetchBandwidth3;
1166 double PrefetchBandwidth4;
1167
1168 if (Tpre_rounded - *Tno_bw > 0) {
1169 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1170 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1171 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1172 } else {
1173 PrefetchBandwidth1 = 0;
1174 }
1175
1176 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1177 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1178 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1179 }
1180
1181 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1182 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1183 else
1184 PrefetchBandwidth2 = 0;
1185
1186 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1187 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1188 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1189 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1190 } else {
1191 PrefetchBandwidth3 = 0;
1192 }
1193
1194#ifdef __DML_VBA_DEBUG__
1195 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1196 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1197 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1198#endif
1199 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1200 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1201 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1202 }
1203
1204 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1205 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1206 else
1207 PrefetchBandwidth4 = 0;
1208
1209 {
1210 bool Case1OK;
1211 bool Case2OK;
1212 bool Case3OK;
1213
1214 if (PrefetchBandwidth1 > 0) {
1215 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1216 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1217 Case1OK = true;
1218 } else {
1219 Case1OK = false;
1220 }
1221 } else {
1222 Case1OK = false;
1223 }
1224
1225 if (PrefetchBandwidth2 > 0) {
1226 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1227 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1228 Case2OK = true;
1229 } else {
1230 Case2OK = false;
1231 }
1232 } else {
1233 Case2OK = false;
1234 }
1235
1236 if (PrefetchBandwidth3 > 0) {
1237 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1238 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1239 Case3OK = true;
1240 } else {
1241 Case3OK = false;
1242 }
1243 } else {
1244 Case3OK = false;
1245 }
1246
1247 if (Case1OK) {
1248 prefetch_bw_equ = PrefetchBandwidth1;
1249 } else if (Case2OK) {
1250 prefetch_bw_equ = PrefetchBandwidth2;
1251 } else if (Case3OK) {
1252 prefetch_bw_equ = PrefetchBandwidth3;
1253 } else {
1254 prefetch_bw_equ = PrefetchBandwidth4;
1255 }
1256
1257#ifdef __DML_VBA_DEBUG__
1258 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1259 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1260 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1261 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1262#endif
1263
1264 if (prefetch_bw_equ > 0) {
1265 if (GPUVMEnable == true) {
1266 Tvm_equ = dml_max3(a: *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, b: Tvm_trips, c: LineTime / 4);
1267 } else {
1268 Tvm_equ = LineTime / 4;
1269 }
1270
1271 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1272 Tr0_equ = dml_max4(
1273 a: (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1274 b: Tr0_trips,
1275 c: (LineTime - Tvm_equ) / 2,
1276 d: LineTime / 4);
1277 } else {
1278 Tr0_equ = (LineTime - Tvm_equ) / 2;
1279 }
1280 } else {
1281 Tvm_equ = 0;
1282 Tr0_equ = 0;
1283 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1284 }
1285 }
1286
1287 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1288 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1289 TimeForFetchingMetaPTE = Tvm_oto;
1290 TimeForFetchingRowInVBlank = Tr0_oto;
1291 *PrefetchBandwidth = prefetch_bw_oto;
1292 } else {
1293 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1294 TimeForFetchingMetaPTE = Tvm_equ;
1295 TimeForFetchingRowInVBlank = Tr0_equ;
1296 *PrefetchBandwidth = prefetch_bw_equ;
1297 }
1298
1299 *DestinationLinesToRequestVMInVBlank = dml_ceil(a: 4.0 * TimeForFetchingMetaPTE / LineTime, granularity: 1.0) / 4.0;
1300
1301 *DestinationLinesToRequestRowInVBlank = dml_ceil(a: 4.0 * TimeForFetchingRowInVBlank / LineTime, granularity: 1.0) / 4.0;
1302
1303#ifdef __DML_VBA_ALLOW_DELTA__
1304 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1305 // See note above dated 5/30/2018
1306 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1307 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1308#else
1309 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1310#endif
1311
1312#ifdef __DML_VBA_DEBUG__
1313 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1314 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1315 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1316 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1317 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1318 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1319 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1320#endif
1321
1322 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1323
1324 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1325 *VRatioPrefetchY = dml_max(a: *VRatioPrefetchY, b: 1.0);
1326#ifdef __DML_VBA_DEBUG__
1327 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1328 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1329 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1330#endif
1331 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1332 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1333 *VRatioPrefetchY = dml_max(
1334 a: (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1335 b: (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1336 *VRatioPrefetchY = dml_max(a: *VRatioPrefetchY, b: 1.0);
1337 } else {
1338 MyError = true;
1339 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1340 *VRatioPrefetchY = 0;
1341 }
1342#ifdef __DML_VBA_DEBUG__
1343 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1344 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1345 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1346#endif
1347 }
1348
1349 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1350 *VRatioPrefetchC = dml_max(a: *VRatioPrefetchC, b: 1.0);
1351
1352#ifdef __DML_VBA_DEBUG__
1353 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1354 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1355 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1356#endif
1357 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1358 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1359 *VRatioPrefetchC = dml_max(
1360 a: *VRatioPrefetchC,
1361 b: (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1362 *VRatioPrefetchC = dml_max(a: *VRatioPrefetchC, b: 1.0);
1363 } else {
1364 MyError = true;
1365 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1366 *VRatioPrefetchC = 0;
1367 }
1368#ifdef __DML_VBA_DEBUG__
1369 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1370 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1371 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1372#endif
1373 }
1374
1375#ifdef __DML_VBA_DEBUG__
1376 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1377 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1378 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1379#endif
1380
1381 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1382
1383#ifdef __DML_VBA_DEBUG__
1384 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1385#endif
1386
1387 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1388 / LineTime;
1389 } else {
1390 MyError = true;
1391 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1392 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1393 *VRatioPrefetchY = 0;
1394 *VRatioPrefetchC = 0;
1395 *RequiredPrefetchPixDataBWLuma = 0;
1396 *RequiredPrefetchPixDataBWChroma = 0;
1397 }
1398
1399 dml_print(
1400 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1401 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1402 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1403 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1404 dml_print(
1405 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1406 (double) LinesToRequestPrefetchPixelData * LineTime);
1407 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
1408 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1409 dml_print(
1410 "DML: Tslack(pre): %fus - time left over in schedule\n",
1411 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1412 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1413 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1414
1415 } else {
1416 MyError = true;
1417 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1418 }
1419
1420 {
1421 double prefetch_vm_bw;
1422 double prefetch_row_bw;
1423
1424 if (PDEAndMetaPTEBytesFrame == 0) {
1425 prefetch_vm_bw = 0;
1426 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1427#ifdef __DML_VBA_DEBUG__
1428 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1429 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1430 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1431 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1432#endif
1433 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1434#ifdef __DML_VBA_DEBUG__
1435 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1436#endif
1437 } else {
1438 prefetch_vm_bw = 0;
1439 MyError = true;
1440 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1441 }
1442
1443 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1444 prefetch_row_bw = 0;
1445 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1446 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1447
1448#ifdef __DML_VBA_DEBUG__
1449 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1450 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1451 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1452 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1453#endif
1454 } else {
1455 prefetch_row_bw = 0;
1456 MyError = true;
1457 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1458 }
1459
1460 *prefetch_vmrow_bw = dml_max(a: prefetch_vm_bw, b: prefetch_row_bw);
1461 }
1462
1463 if (MyError) {
1464 *PrefetchBandwidth = 0;
1465 *DestinationLinesToRequestVMInVBlank = 0;
1466 *DestinationLinesToRequestRowInVBlank = 0;
1467 *DestinationLinesForPrefetch = 0;
1468 *VRatioPrefetchY = 0;
1469 *VRatioPrefetchC = 0;
1470 *RequiredPrefetchPixDataBWLuma = 0;
1471 *RequiredPrefetchPixDataBWChroma = 0;
1472 }
1473
1474 return MyError;
1475}
1476
1477static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1478{
1479 return VCOSpeed * 4 / dml_floor(a: VCOSpeed * 4 / Clock, granularity: 1);
1480}
1481
1482static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1483{
1484 return VCOSpeed * 4 / dml_ceil(a: VCOSpeed * 4.0 / Clock, granularity: 1);
1485}
1486
1487static void CalculateDCCConfiguration(
1488 bool DCCEnabled,
1489 bool DCCProgrammingAssumesScanDirectionUnknown,
1490 enum source_format_class SourcePixelFormat,
1491 unsigned int SurfaceWidthLuma,
1492 unsigned int SurfaceWidthChroma,
1493 unsigned int SurfaceHeightLuma,
1494 unsigned int SurfaceHeightChroma,
1495 double DETBufferSize,
1496 unsigned int RequestHeight256ByteLuma,
1497 unsigned int RequestHeight256ByteChroma,
1498 enum dm_swizzle_mode TilingFormat,
1499 unsigned int BytePerPixelY,
1500 unsigned int BytePerPixelC,
1501 double BytePerPixelDETY,
1502 double BytePerPixelDETC,
1503 enum scan_direction_class ScanOrientation,
1504 unsigned int *MaxUncompressedBlockLuma,
1505 unsigned int *MaxUncompressedBlockChroma,
1506 unsigned int *MaxCompressedBlockLuma,
1507 unsigned int *MaxCompressedBlockChroma,
1508 unsigned int *IndependentBlockLuma,
1509 unsigned int *IndependentBlockChroma)
1510{
1511 int yuv420;
1512 int horz_div_l;
1513 int horz_div_c;
1514 int vert_div_l;
1515 int vert_div_c;
1516
1517 int swath_buf_size;
1518 double detile_buf_vp_horz_limit;
1519 double detile_buf_vp_vert_limit;
1520
1521 int MAS_vp_horz_limit;
1522 int MAS_vp_vert_limit;
1523 int max_vp_horz_width;
1524 int max_vp_vert_height;
1525 int eff_surf_width_l;
1526 int eff_surf_width_c;
1527 int eff_surf_height_l;
1528 int eff_surf_height_c;
1529
1530 int full_swath_bytes_horz_wc_l;
1531 int full_swath_bytes_horz_wc_c;
1532 int full_swath_bytes_vert_wc_l;
1533 int full_swath_bytes_vert_wc_c;
1534 int req128_horz_wc_l;
1535 int req128_horz_wc_c;
1536 int req128_vert_wc_l;
1537 int req128_vert_wc_c;
1538 int segment_order_horz_contiguous_luma;
1539 int segment_order_horz_contiguous_chroma;
1540 int segment_order_vert_contiguous_luma;
1541 int segment_order_vert_contiguous_chroma;
1542
1543 typedef enum {
1544 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1545 } RequestType;
1546 RequestType RequestLuma;
1547 RequestType RequestChroma;
1548
1549 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1550 horz_div_l = 1;
1551 horz_div_c = 1;
1552 vert_div_l = 1;
1553 vert_div_c = 1;
1554
1555 if (BytePerPixelY == 1)
1556 vert_div_l = 0;
1557 if (BytePerPixelC == 1)
1558 vert_div_c = 0;
1559 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1560 horz_div_l = 0;
1561 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1562 horz_div_c = 0;
1563
1564 if (BytePerPixelC == 0) {
1565 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1566 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1567 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1568 } else {
1569 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1570 detile_buf_vp_horz_limit = (double) swath_buf_size
1571 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1572 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1573 detile_buf_vp_vert_limit = (double) swath_buf_size
1574 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1575 }
1576
1577 if (SourcePixelFormat == dm_420_10) {
1578 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1579 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1580 }
1581
1582 detile_buf_vp_horz_limit = dml_floor(a: detile_buf_vp_horz_limit - 1, granularity: 16);
1583 detile_buf_vp_vert_limit = dml_floor(a: detile_buf_vp_vert_limit - 1, granularity: 16);
1584
1585 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1586 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1587 max_vp_horz_width = dml_min(a: (double) MAS_vp_horz_limit, b: detile_buf_vp_horz_limit);
1588 max_vp_vert_height = dml_min(a: (double) MAS_vp_vert_limit, b: detile_buf_vp_vert_limit);
1589 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1590 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1591 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1592 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1593
1594 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1595 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1596 if (BytePerPixelC > 0) {
1597 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1598 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1599 } else {
1600 full_swath_bytes_horz_wc_c = 0;
1601 full_swath_bytes_vert_wc_c = 0;
1602 }
1603
1604 if (SourcePixelFormat == dm_420_10) {
1605 full_swath_bytes_horz_wc_l = dml_ceil(a: full_swath_bytes_horz_wc_l * 2 / 3, granularity: 256);
1606 full_swath_bytes_horz_wc_c = dml_ceil(a: full_swath_bytes_horz_wc_c * 2 / 3, granularity: 256);
1607 full_swath_bytes_vert_wc_l = dml_ceil(a: full_swath_bytes_vert_wc_l * 2 / 3, granularity: 256);
1608 full_swath_bytes_vert_wc_c = dml_ceil(a: full_swath_bytes_vert_wc_c * 2 / 3, granularity: 256);
1609 }
1610
1611 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1612 req128_horz_wc_l = 0;
1613 req128_horz_wc_c = 0;
1614 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1615 req128_horz_wc_l = 0;
1616 req128_horz_wc_c = 1;
1617 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1618 req128_horz_wc_l = 1;
1619 req128_horz_wc_c = 0;
1620 } else {
1621 req128_horz_wc_l = 1;
1622 req128_horz_wc_c = 1;
1623 }
1624
1625 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1626 req128_vert_wc_l = 0;
1627 req128_vert_wc_c = 0;
1628 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1629 req128_vert_wc_l = 0;
1630 req128_vert_wc_c = 1;
1631 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1632 req128_vert_wc_l = 1;
1633 req128_vert_wc_c = 0;
1634 } else {
1635 req128_vert_wc_l = 1;
1636 req128_vert_wc_c = 1;
1637 }
1638
1639 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1640 segment_order_horz_contiguous_luma = 0;
1641 } else {
1642 segment_order_horz_contiguous_luma = 1;
1643 }
1644 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1645 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1646 segment_order_vert_contiguous_luma = 0;
1647 } else {
1648 segment_order_vert_contiguous_luma = 1;
1649 }
1650 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1651 segment_order_horz_contiguous_chroma = 0;
1652 } else {
1653 segment_order_horz_contiguous_chroma = 1;
1654 }
1655 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1656 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1657 segment_order_vert_contiguous_chroma = 0;
1658 } else {
1659 segment_order_vert_contiguous_chroma = 1;
1660 }
1661
1662 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1663 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1664 RequestLuma = REQ_256Bytes;
1665 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1666 RequestLuma = REQ_128BytesNonContiguous;
1667 } else {
1668 RequestLuma = REQ_128BytesContiguous;
1669 }
1670 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1671 RequestChroma = REQ_256Bytes;
1672 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1673 RequestChroma = REQ_128BytesNonContiguous;
1674 } else {
1675 RequestChroma = REQ_128BytesContiguous;
1676 }
1677 } else if (ScanOrientation != dm_vert) {
1678 if (req128_horz_wc_l == 0) {
1679 RequestLuma = REQ_256Bytes;
1680 } else if (segment_order_horz_contiguous_luma == 0) {
1681 RequestLuma = REQ_128BytesNonContiguous;
1682 } else {
1683 RequestLuma = REQ_128BytesContiguous;
1684 }
1685 if (req128_horz_wc_c == 0) {
1686 RequestChroma = REQ_256Bytes;
1687 } else if (segment_order_horz_contiguous_chroma == 0) {
1688 RequestChroma = REQ_128BytesNonContiguous;
1689 } else {
1690 RequestChroma = REQ_128BytesContiguous;
1691 }
1692 } else {
1693 if (req128_vert_wc_l == 0) {
1694 RequestLuma = REQ_256Bytes;
1695 } else if (segment_order_vert_contiguous_luma == 0) {
1696 RequestLuma = REQ_128BytesNonContiguous;
1697 } else {
1698 RequestLuma = REQ_128BytesContiguous;
1699 }
1700 if (req128_vert_wc_c == 0) {
1701 RequestChroma = REQ_256Bytes;
1702 } else if (segment_order_vert_contiguous_chroma == 0) {
1703 RequestChroma = REQ_128BytesNonContiguous;
1704 } else {
1705 RequestChroma = REQ_128BytesContiguous;
1706 }
1707 }
1708
1709 if (RequestLuma == REQ_256Bytes) {
1710 *MaxUncompressedBlockLuma = 256;
1711 *MaxCompressedBlockLuma = 256;
1712 *IndependentBlockLuma = 0;
1713 } else if (RequestLuma == REQ_128BytesContiguous) {
1714 *MaxUncompressedBlockLuma = 256;
1715 *MaxCompressedBlockLuma = 128;
1716 *IndependentBlockLuma = 128;
1717 } else {
1718 *MaxUncompressedBlockLuma = 256;
1719 *MaxCompressedBlockLuma = 64;
1720 *IndependentBlockLuma = 64;
1721 }
1722
1723 if (RequestChroma == REQ_256Bytes) {
1724 *MaxUncompressedBlockChroma = 256;
1725 *MaxCompressedBlockChroma = 256;
1726 *IndependentBlockChroma = 0;
1727 } else if (RequestChroma == REQ_128BytesContiguous) {
1728 *MaxUncompressedBlockChroma = 256;
1729 *MaxCompressedBlockChroma = 128;
1730 *IndependentBlockChroma = 128;
1731 } else {
1732 *MaxUncompressedBlockChroma = 256;
1733 *MaxCompressedBlockChroma = 64;
1734 *IndependentBlockChroma = 64;
1735 }
1736
1737 if (DCCEnabled != true || BytePerPixelC == 0) {
1738 *MaxUncompressedBlockChroma = 0;
1739 *MaxCompressedBlockChroma = 0;
1740 *IndependentBlockChroma = 0;
1741 }
1742
1743 if (DCCEnabled != true) {
1744 *MaxUncompressedBlockLuma = 0;
1745 *MaxCompressedBlockLuma = 0;
1746 *IndependentBlockLuma = 0;
1747 }
1748}
1749
1750static double CalculatePrefetchSourceLines(
1751 struct display_mode_lib *mode_lib,
1752 double VRatio,
1753 double vtaps,
1754 bool Interlace,
1755 bool ProgressiveToInterlaceUnitInOPP,
1756 unsigned int SwathHeight,
1757 unsigned int ViewportYStart,
1758 double *VInitPreFill,
1759 unsigned int *MaxNumSwath)
1760{
1761 struct vba_vars_st *v = &mode_lib->vba;
1762 unsigned int MaxPartialSwath;
1763
1764 if (ProgressiveToInterlaceUnitInOPP)
1765 *VInitPreFill = dml_floor(a: (VRatio + vtaps + 1) / 2.0, granularity: 1);
1766 else
1767 *VInitPreFill = dml_floor(a: (VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, granularity: 1);
1768
1769 if (!v->IgnoreViewportPositioning) {
1770
1771 *MaxNumSwath = dml_ceil(a: (*VInitPreFill - 1.0) / SwathHeight, granularity: 1) + 1.0;
1772
1773 if (*VInitPreFill > 1.0)
1774 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1775 else
1776 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1777 MaxPartialSwath = dml_max(a: 1U, b: MaxPartialSwath);
1778
1779 } else {
1780
1781 if (ViewportYStart != 0)
1782 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1783
1784 *MaxNumSwath = dml_ceil(a: *VInitPreFill / SwathHeight, granularity: 1);
1785
1786 if (*VInitPreFill > 1.0)
1787 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1788 else
1789 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1790 }
1791
1792#ifdef __DML_VBA_DEBUG__
1793 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1794 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1795 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1796 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1797 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1798 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1799 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1800 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1801 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1802#endif
1803 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1804}
1805
1806static unsigned int CalculateVMAndRowBytes(
1807 struct display_mode_lib *mode_lib,
1808 bool DCCEnable,
1809 unsigned int BlockHeight256Bytes,
1810 unsigned int BlockWidth256Bytes,
1811 enum source_format_class SourcePixelFormat,
1812 unsigned int SurfaceTiling,
1813 unsigned int BytePerPixel,
1814 enum scan_direction_class ScanDirection,
1815 unsigned int SwathWidth,
1816 unsigned int ViewportHeight,
1817 bool GPUVMEnable,
1818 bool HostVMEnable,
1819 unsigned int HostVMMaxNonCachedPageTableLevels,
1820 unsigned int GPUVMMinPageSize,
1821 unsigned int HostVMMinPageSize,
1822 unsigned int PTEBufferSizeInRequests,
1823 unsigned int Pitch,
1824 unsigned int DCCMetaPitch,
1825 unsigned int *MacroTileWidth,
1826 unsigned int *MetaRowByte,
1827 unsigned int *PixelPTEBytesPerRow,
1828 bool *PTEBufferSizeNotExceeded,
1829 int *dpte_row_width_ub,
1830 unsigned int *dpte_row_height,
1831 unsigned int *MetaRequestWidth,
1832 unsigned int *MetaRequestHeight,
1833 unsigned int *meta_row_width,
1834 unsigned int *meta_row_height,
1835 int *vm_group_bytes,
1836 unsigned int *dpte_group_bytes,
1837 unsigned int *PixelPTEReqWidth,
1838 unsigned int *PixelPTEReqHeight,
1839 unsigned int *PTERequestSize,
1840 int *DPDE0BytesFrame,
1841 int *MetaPTEBytesFrame)
1842{
1843 struct vba_vars_st *v = &mode_lib->vba;
1844 unsigned int MPDEBytesFrame;
1845 unsigned int DCCMetaSurfaceBytes;
1846 unsigned int MacroTileSizeBytes;
1847 unsigned int MacroTileHeight;
1848 unsigned int ExtraDPDEBytesFrame;
1849 unsigned int PDEAndMetaPTEBytesFrame;
1850 unsigned int PixelPTEReqHeightPTEs = 0;
1851 unsigned int HostVMDynamicLevels = 0;
1852 double FractionOfPTEReturnDrop;
1853
1854 if (GPUVMEnable == true && HostVMEnable == true) {
1855 if (HostVMMinPageSize < 2048) {
1856 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1857 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1858 HostVMDynamicLevels = dml_max(a: 0, b: (int) HostVMMaxNonCachedPageTableLevels - 1);
1859 } else {
1860 HostVMDynamicLevels = dml_max(a: 0, b: (int) HostVMMaxNonCachedPageTableLevels - 2);
1861 }
1862 }
1863
1864 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1865 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1866 if (ScanDirection != dm_vert) {
1867 *meta_row_height = *MetaRequestHeight;
1868 *meta_row_width = dml_ceil(a: (double) SwathWidth - 1, granularity: *MetaRequestWidth) + *MetaRequestWidth;
1869 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1870 } else {
1871 *meta_row_height = *MetaRequestWidth;
1872 *meta_row_width = dml_ceil(a: (double) SwathWidth - 1, granularity: *MetaRequestHeight) + *MetaRequestHeight;
1873 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1874 }
1875 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(a: ViewportHeight - 1, granularity: 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1876 if (GPUVMEnable == true) {
1877 *MetaPTEBytesFrame = (dml_ceil(a: (double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), granularity: 1) + 1) * 64;
1878 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1879 } else {
1880 *MetaPTEBytesFrame = 0;
1881 MPDEBytesFrame = 0;
1882 }
1883
1884 if (DCCEnable != true) {
1885 *MetaPTEBytesFrame = 0;
1886 MPDEBytesFrame = 0;
1887 *MetaRowByte = 0;
1888 }
1889
1890 if (SurfaceTiling == dm_sw_linear) {
1891 MacroTileSizeBytes = 256;
1892 MacroTileHeight = BlockHeight256Bytes;
1893 } else {
1894 MacroTileSizeBytes = 65536;
1895 MacroTileHeight = 16 * BlockHeight256Bytes;
1896 }
1897 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1898
1899 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1900 if (ScanDirection != dm_vert) {
1901 *DPDE0BytesFrame = 64
1902 * (dml_ceil(
1903 a: ((Pitch * (dml_ceil(a: ViewportHeight - 1, granularity: MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1904 / (8 * 2097152),
1905 granularity: 1) + 1);
1906 } else {
1907 *DPDE0BytesFrame = 64
1908 * (dml_ceil(
1909 a: ((Pitch * (dml_ceil(a: (double) SwathWidth - 1, granularity: MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1910 / (8 * 2097152),
1911 granularity: 1) + 1);
1912 }
1913 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1914 } else {
1915 *DPDE0BytesFrame = 0;
1916 ExtraDPDEBytesFrame = 0;
1917 }
1918
1919 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1920
1921#ifdef __DML_VBA_DEBUG__
1922 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1923 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1924 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1925 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1926 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1927#endif
1928
1929 if (HostVMEnable == true) {
1930 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1931 }
1932#ifdef __DML_VBA_DEBUG__
1933 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1934#endif
1935
1936 if (SurfaceTiling == dm_sw_linear) {
1937 PixelPTEReqHeightPTEs = 1;
1938 *PixelPTEReqHeight = 1;
1939 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1940 *PTERequestSize = 64;
1941 FractionOfPTEReturnDrop = 0;
1942 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1943 PixelPTEReqHeightPTEs = 16;
1944 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1945 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1946 *PTERequestSize = 128;
1947 FractionOfPTEReturnDrop = 0;
1948 } else {
1949 PixelPTEReqHeightPTEs = 1;
1950 *PixelPTEReqHeight = MacroTileHeight;
1951 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1952 *PTERequestSize = 64;
1953 FractionOfPTEReturnDrop = 0;
1954 }
1955
1956 if (SurfaceTiling == dm_sw_linear) {
1957 *dpte_row_height = dml_min(a: 128, b: 1 << (unsigned int) dml_floor(a: dml_log2(x: PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), granularity: 1));
1958 *dpte_row_width_ub = (dml_ceil(a: (double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, granularity: 1) + 1) * *PixelPTEReqWidth;
1959 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1960 } else if (ScanDirection != dm_vert) {
1961 *dpte_row_height = *PixelPTEReqHeight;
1962 *dpte_row_width_ub = (dml_ceil(a: (double) (SwathWidth - 1) / *PixelPTEReqWidth, granularity: 1) + 1) * *PixelPTEReqWidth;
1963 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1964 } else {
1965 *dpte_row_height = dml_min(a: *PixelPTEReqWidth, b: *MacroTileWidth);
1966 *dpte_row_width_ub = (dml_ceil(a: (double) (SwathWidth - 1) / *PixelPTEReqHeight, granularity: 1) + 1) * *PixelPTEReqHeight;
1967 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1968 }
1969
1970 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1971 *PTEBufferSizeNotExceeded = true;
1972 } else {
1973 *PTEBufferSizeNotExceeded = false;
1974 }
1975
1976 if (GPUVMEnable != true) {
1977 *PixelPTEBytesPerRow = 0;
1978 *PTEBufferSizeNotExceeded = true;
1979 }
1980
1981 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1982
1983 if (HostVMEnable == true) {
1984 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1985 }
1986
1987 if (HostVMEnable == true) {
1988 *vm_group_bytes = 512;
1989 *dpte_group_bytes = 512;
1990 } else if (GPUVMEnable == true) {
1991 *vm_group_bytes = 2048;
1992 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1993 *dpte_group_bytes = 512;
1994 } else {
1995 *dpte_group_bytes = 2048;
1996 }
1997 } else {
1998 *vm_group_bytes = 0;
1999 *dpte_group_bytes = 0;
2000 }
2001 return PDEAndMetaPTEBytesFrame;
2002}
2003
2004static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
2005{
2006 struct vba_vars_st *v = &mode_lib->vba;
2007 unsigned int j, k;
2008 double HostVMInefficiencyFactor = 1.0;
2009 bool NoChromaPlanes = true;
2010 int ReorderBytes;
2011 double VMDataOnlyReturnBW;
2012 double MaxTotalRDBandwidth = 0;
2013 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
2014
2015 v->WritebackDISPCLK = 0.0;
2016 v->DISPCLKWithRamping = 0;
2017 v->DISPCLKWithoutRamping = 0;
2018 v->GlobalDPPCLK = 0.0;
2019 /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */
2020 {
2021 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2022 a: v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2023 b: v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2024 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2025
2026 if (v->HostVMEnable != true) {
2027 v->ReturnBW = dml_min(
2028 a: IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2029 b: IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2030 } else {
2031 v->ReturnBW = dml_min(
2032 a: IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2033 b: IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2034 }
2035 }
2036 /* End DAL custom code */
2037
2038 // DISPCLK and DPPCLK Calculation
2039 //
2040 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2041 if (v->WritebackEnable[k]) {
2042 v->WritebackDISPCLK = dml_max(
2043 a: v->WritebackDISPCLK,
2044 b: dml314_CalculateWriteBackDISPCLK(
2045 WritebackPixelFormat: v->WritebackPixelFormat[k],
2046 PixelClock: v->PixelClock[k],
2047 WritebackHRatio: v->WritebackHRatio[k],
2048 WritebackVRatio: v->WritebackVRatio[k],
2049 WritebackHTaps: v->WritebackHTaps[k],
2050 WritebackVTaps: v->WritebackVTaps[k],
2051 WritebackSourceWidth: v->WritebackSourceWidth[k],
2052 WritebackDestinationWidth: v->WritebackDestinationWidth[k],
2053 HTotal: v->HTotal[k],
2054 WritebackLineBufferSize: v->WritebackLineBufferSize));
2055 }
2056 }
2057
2058 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2059 if (v->HRatio[k] > 1) {
2060 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2061 a: v->MaxDCHUBToPSCLThroughput,
2062 b: v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(a: v->htaps[k] / 6.0, granularity: 1));
2063 } else {
2064 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(a: v->MaxDCHUBToPSCLThroughput, b: v->MaxPSCLToLBThroughput);
2065 }
2066
2067 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2068 * dml_max(
2069 a: v->vtaps[k] / 6.0 * dml_min(a: 1.0, b: v->HRatio[k]),
2070 b: dml_max(a: v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], b: 1.0));
2071
2072 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2073 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2074 }
2075
2076 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2077 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2078 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2079 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2080 } else {
2081 if (v->HRatioChroma[k] > 1) {
2082 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2083 a: v->MaxDCHUBToPSCLThroughput,
2084 b: v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(a: v->HTAPsChroma[k] / 6.0, granularity: 1.0));
2085 } else {
2086 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(a: v->MaxDCHUBToPSCLThroughput, b: v->MaxPSCLToLBThroughput);
2087 }
2088 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2089 * dml_max3(
2090 a: v->VTAPsChroma[k] / 6.0 * dml_min(a: 1.0, b: v->HRatioChroma[k]),
2091 b: v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2092 c: 1.0);
2093
2094 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2095 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2096 }
2097
2098 v->DPPCLKUsingSingleDPP[k] = dml_max(a: v->DPPCLKUsingSingleDPPLuma, b: v->DPPCLKUsingSingleDPPChroma);
2099 }
2100 }
2101
2102 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2103 if (v->BlendingAndTiming[k] != k)
2104 continue;
2105 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2106 v->DISPCLKWithRamping = dml_max(
2107 a: v->DISPCLKWithRamping,
2108 b: v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2109 * (1 + v->DISPCLKRampingMargin / 100));
2110 v->DISPCLKWithoutRamping = dml_max(
2111 a: v->DISPCLKWithoutRamping,
2112 b: v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2113 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2114 v->DISPCLKWithRamping = dml_max(
2115 a: v->DISPCLKWithRamping,
2116 b: v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2117 * (1 + v->DISPCLKRampingMargin / 100));
2118 v->DISPCLKWithoutRamping = dml_max(
2119 a: v->DISPCLKWithoutRamping,
2120 b: v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2121 } else {
2122 v->DISPCLKWithRamping = dml_max(
2123 a: v->DISPCLKWithRamping,
2124 b: v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2125 v->DISPCLKWithoutRamping = dml_max(
2126 a: v->DISPCLKWithoutRamping,
2127 b: v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2128 }
2129 }
2130
2131 v->DISPCLKWithRamping = dml_max(a: v->DISPCLKWithRamping, b: v->WritebackDISPCLK);
2132 v->DISPCLKWithoutRamping = dml_max(a: v->DISPCLKWithoutRamping, b: v->WritebackDISPCLK);
2133
2134 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2135 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(Clock: v->DISPCLKWithRamping, VCOSpeed: v->DISPCLKDPPCLKVCOSpeed);
2136 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(Clock: v->DISPCLKWithoutRamping, VCOSpeed: v->DISPCLKDPPCLKVCOSpeed);
2137 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2138 Clock: v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2139 VCOSpeed: v->DISPCLKDPPCLKVCOSpeed);
2140 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2141 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2142 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2143 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2144 } else {
2145 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2146 }
2147 v->DISPCLK = v->DISPCLK_calculated;
2148 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2149
2150 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2151 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2152 v->GlobalDPPCLK = dml_max(a: v->GlobalDPPCLK, b: v->DPPCLK_calculated[k]);
2153 }
2154 v->GlobalDPPCLK = RoundToDFSGranularityUp(Clock: v->GlobalDPPCLK, VCOSpeed: v->DISPCLKDPPCLKVCOSpeed);
2155 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2156 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(a: v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, granularity: 1);
2157 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2158 }
2159
2160 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2161 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2162 }
2163
2164 // Urgent and B P-State/DRAM Clock Change Watermark
2165 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2166 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2167
2168 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2169 CalculateBytePerPixelAnd256BBlockSizes(
2170 SourcePixelFormat: v->SourcePixelFormat[k],
2171 SurfaceTiling: v->SurfaceTiling[k],
2172 BytePerPixelY: &v->BytePerPixelY[k],
2173 BytePerPixelC: &v->BytePerPixelC[k],
2174 BytePerPixelDETY: &v->BytePerPixelDETY[k],
2175 BytePerPixelDETC: &v->BytePerPixelDETC[k],
2176 BlockHeight256BytesY: &v->BlockHeight256BytesY[k],
2177 BlockHeight256BytesC: &v->BlockHeight256BytesC[k],
2178 BlockWidth256BytesY: &v->BlockWidth256BytesY[k],
2179 BlockWidth256BytesC: &v->BlockWidth256BytesC[k]);
2180 }
2181
2182 CalculateSwathWidth(
2183 ForceSingleDPP: false,
2184 NumberOfActivePlanes: v->NumberOfActivePlanes,
2185 SourcePixelFormat: v->SourcePixelFormat,
2186 SourceScan: v->SourceScan,
2187 ViewportWidth: v->ViewportWidth,
2188 ViewportHeight: v->ViewportHeight,
2189 SurfaceWidthY: v->SurfaceWidthY,
2190 SurfaceWidthC: v->SurfaceWidthC,
2191 SurfaceHeightY: v->SurfaceHeightY,
2192 SurfaceHeightC: v->SurfaceHeightC,
2193 ODMCombineEnabled: v->ODMCombineEnabled,
2194 BytePerPixY: v->BytePerPixelY,
2195 BytePerPixC: v->BytePerPixelC,
2196 Read256BytesBlockHeightY: v->BlockHeight256BytesY,
2197 Read256BytesBlockHeightC: v->BlockHeight256BytesC,
2198 Read256BytesBlockWidthY: v->BlockWidth256BytesY,
2199 Read256BytesBlockWidthC: v->BlockWidth256BytesC,
2200 BlendingAndTiming: v->BlendingAndTiming,
2201 HActive: v->HActive,
2202 HRatio: v->HRatio,
2203 DPPPerPlane: v->DPPPerPlane,
2204 SwathWidthSingleDPPY: v->SwathWidthSingleDPPY,
2205 SwathWidthSingleDPPC: v->SwathWidthSingleDPPC,
2206 SwathWidthY: v->SwathWidthY,
2207 SwathWidthC: v->SwathWidthC,
2208 MaximumSwathHeightY: v->dummyinteger3,
2209 MaximumSwathHeightC: v->dummyinteger4,
2210 swath_width_luma_ub: v->swath_width_luma_ub,
2211 swath_width_chroma_ub: v->swath_width_chroma_ub);
2212
2213 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2214 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2215 * v->VRatio[k];
2216 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2217 * v->VRatioChroma[k];
2218 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2219 }
2220
2221 // DCFCLK Deep Sleep
2222 CalculateDCFCLKDeepSleep(
2223 mode_lib,
2224 NumberOfActivePlanes: v->NumberOfActivePlanes,
2225 BytePerPixelY: v->BytePerPixelY,
2226 BytePerPixelC: v->BytePerPixelC,
2227 VRatio: v->VRatio,
2228 VRatioChroma: v->VRatioChroma,
2229 SwathWidthY: v->SwathWidthY,
2230 SwathWidthC: v->SwathWidthC,
2231 DPPPerPlane: v->DPPPerPlane,
2232 HRatio: v->HRatio,
2233 HRatioChroma: v->HRatioChroma,
2234 PixelClock: v->PixelClock,
2235 PSCL_THROUGHPUT: v->PSCL_THROUGHPUT_LUMA,
2236 PSCL_THROUGHPUT_CHROMA: v->PSCL_THROUGHPUT_CHROMA,
2237 DPPCLK: v->DPPCLK,
2238 ReadBandwidthLuma: v->ReadBandwidthPlaneLuma,
2239 ReadBandwidthChroma: v->ReadBandwidthPlaneChroma,
2240 ReturnBusWidth: v->ReturnBusWidth,
2241 DCFCLKDeepSleep: &v->DCFCLKDeepSleep);
2242
2243 // DSCCLK
2244 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2245 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2246 v->DSCCLK_calculated[k] = 0.0;
2247 } else {
2248 if (v->OutputFormat[k] == dm_420)
2249 v->DSCFormatFactor = 2;
2250 else if (v->OutputFormat[k] == dm_444)
2251 v->DSCFormatFactor = 1;
2252 else if (v->OutputFormat[k] == dm_n422)
2253 v->DSCFormatFactor = 2;
2254 else
2255 v->DSCFormatFactor = 1;
2256 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2257 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2258 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2259 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2260 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2261 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2262 else
2263 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2264 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2265 }
2266 }
2267
2268 // DSC Delay
2269 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2270 double BPP = v->OutputBpp[k];
2271
2272 if (v->DSCEnabled[k] && BPP != 0) {
2273 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2274 v->DSCDelay[k] = dscceComputeDelay(
2275 bpc: v->DSCInputBitPerComponent[k],
2276 BPP,
2277 sliceWidth: dml_ceil(a: (double) v->HActive[k] / v->NumberOfDSCSlices[k], granularity: 1),
2278 numSlices: v->NumberOfDSCSlices[k],
2279 pixelFormat: v->OutputFormat[k],
2280 Output: v->Output[k]) + dscComputeDelay(pixelFormat: v->OutputFormat[k], Output: v->Output[k]);
2281 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2282 v->DSCDelay[k] = 2
2283 * (dscceComputeDelay(
2284 bpc: v->DSCInputBitPerComponent[k],
2285 BPP,
2286 sliceWidth: dml_ceil(a: (double) v->HActive[k] / v->NumberOfDSCSlices[k], granularity: 1),
2287 numSlices: v->NumberOfDSCSlices[k] / 2.0,
2288 pixelFormat: v->OutputFormat[k],
2289 Output: v->Output[k]) + dscComputeDelay(pixelFormat: v->OutputFormat[k], Output: v->Output[k]));
2290 } else {
2291 v->DSCDelay[k] = 4
2292 * (dscceComputeDelay(
2293 bpc: v->DSCInputBitPerComponent[k],
2294 BPP,
2295 sliceWidth: dml_ceil(a: (double) v->HActive[k] / v->NumberOfDSCSlices[k], granularity: 1),
2296 numSlices: v->NumberOfDSCSlices[k] / 4.0,
2297 pixelFormat: v->OutputFormat[k],
2298 Output: v->Output[k]) + dscComputeDelay(pixelFormat: v->OutputFormat[k], Output: v->Output[k]));
2299 }
2300 v->DSCDelay[k] = v->DSCDelay[k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil(a: (double) v->DSCDelay[k] / v->HActive[k], granularity: 1);
2301 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2302 } else {
2303 v->DSCDelay[k] = 0;
2304 }
2305 }
2306
2307 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2308 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2309 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2310 v->DSCDelay[k] = v->DSCDelay[j];
2311
2312 // Prefetch
2313 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2314 unsigned int PDEAndMetaPTEBytesFrameY;
2315 unsigned int PixelPTEBytesPerRowY;
2316 unsigned int MetaRowByteY;
2317 unsigned int MetaRowByteC;
2318 unsigned int PDEAndMetaPTEBytesFrameC;
2319 unsigned int PixelPTEBytesPerRowC;
2320 bool PTEBufferSizeNotExceededY;
2321 bool PTEBufferSizeNotExceededC;
2322
2323 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2324 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2325 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2326 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2327 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2328 } else {
2329 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2330 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2331 }
2332
2333 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2334 mode_lib,
2335 DCCEnable: v->DCCEnable[k],
2336 BlockHeight256Bytes: v->BlockHeight256BytesC[k],
2337 BlockWidth256Bytes: v->BlockWidth256BytesC[k],
2338 SourcePixelFormat: v->SourcePixelFormat[k],
2339 SurfaceTiling: v->SurfaceTiling[k],
2340 BytePerPixel: v->BytePerPixelC[k],
2341 ScanDirection: v->SourceScan[k],
2342 SwathWidth: v->SwathWidthC[k],
2343 ViewportHeight: v->ViewportHeightChroma[k],
2344 GPUVMEnable: v->GPUVMEnable,
2345 HostVMEnable: v->HostVMEnable,
2346 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels,
2347 GPUVMMinPageSize: v->GPUVMMinPageSize,
2348 HostVMMinPageSize: v->HostVMMinPageSize,
2349 PTEBufferSizeInRequests: v->PTEBufferSizeInRequestsForChroma,
2350 Pitch: v->PitchC[k],
2351 DCCMetaPitch: v->DCCMetaPitchC[k],
2352 MacroTileWidth: &v->MacroTileWidthC[k],
2353 MetaRowByte: &MetaRowByteC,
2354 PixelPTEBytesPerRow: &PixelPTEBytesPerRowC,
2355 PTEBufferSizeNotExceeded: &PTEBufferSizeNotExceededC,
2356 dpte_row_width_ub: &v->dpte_row_width_chroma_ub[k],
2357 dpte_row_height: &v->dpte_row_height_chroma[k],
2358 MetaRequestWidth: &v->meta_req_width_chroma[k],
2359 MetaRequestHeight: &v->meta_req_height_chroma[k],
2360 meta_row_width: &v->meta_row_width_chroma[k],
2361 meta_row_height: &v->meta_row_height_chroma[k],
2362 vm_group_bytes: &v->dummyinteger1,
2363 dpte_group_bytes: &v->dummyinteger2,
2364 PixelPTEReqWidth: &v->PixelPTEReqWidthC[k],
2365 PixelPTEReqHeight: &v->PixelPTEReqHeightC[k],
2366 PTERequestSize: &v->PTERequestSizeC[k],
2367 DPDE0BytesFrame: &v->dpde0_bytes_per_frame_ub_c[k],
2368 MetaPTEBytesFrame: &v->meta_pte_bytes_per_frame_ub_c[k]);
2369
2370 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2371 mode_lib,
2372 VRatio: v->VRatioChroma[k],
2373 vtaps: v->VTAPsChroma[k],
2374 Interlace: v->Interlace[k],
2375 ProgressiveToInterlaceUnitInOPP: v->ProgressiveToInterlaceUnitInOPP,
2376 SwathHeight: v->SwathHeightC[k],
2377 ViewportYStart: v->ViewportYStartC[k],
2378 VInitPreFill: &v->VInitPreFillC[k],
2379 MaxNumSwath: &v->MaxNumSwathC[k]);
2380 } else {
2381 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2382 v->PTEBufferSizeInRequestsForChroma = 0;
2383 PixelPTEBytesPerRowC = 0;
2384 PDEAndMetaPTEBytesFrameC = 0;
2385 MetaRowByteC = 0;
2386 v->MaxNumSwathC[k] = 0;
2387 v->PrefetchSourceLinesC[k] = 0;
2388 }
2389
2390 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2391 mode_lib,
2392 DCCEnable: v->DCCEnable[k],
2393 BlockHeight256Bytes: v->BlockHeight256BytesY[k],
2394 BlockWidth256Bytes: v->BlockWidth256BytesY[k],
2395 SourcePixelFormat: v->SourcePixelFormat[k],
2396 SurfaceTiling: v->SurfaceTiling[k],
2397 BytePerPixel: v->BytePerPixelY[k],
2398 ScanDirection: v->SourceScan[k],
2399 SwathWidth: v->SwathWidthY[k],
2400 ViewportHeight: v->ViewportHeight[k],
2401 GPUVMEnable: v->GPUVMEnable,
2402 HostVMEnable: v->HostVMEnable,
2403 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels,
2404 GPUVMMinPageSize: v->GPUVMMinPageSize,
2405 HostVMMinPageSize: v->HostVMMinPageSize,
2406 PTEBufferSizeInRequests: v->PTEBufferSizeInRequestsForLuma,
2407 Pitch: v->PitchY[k],
2408 DCCMetaPitch: v->DCCMetaPitchY[k],
2409 MacroTileWidth: &v->MacroTileWidthY[k],
2410 MetaRowByte: &MetaRowByteY,
2411 PixelPTEBytesPerRow: &PixelPTEBytesPerRowY,
2412 PTEBufferSizeNotExceeded: &PTEBufferSizeNotExceededY,
2413 dpte_row_width_ub: &v->dpte_row_width_luma_ub[k],
2414 dpte_row_height: &v->dpte_row_height[k],
2415 MetaRequestWidth: &v->meta_req_width[k],
2416 MetaRequestHeight: &v->meta_req_height[k],
2417 meta_row_width: &v->meta_row_width[k],
2418 meta_row_height: &v->meta_row_height[k],
2419 vm_group_bytes: &v->vm_group_bytes[k],
2420 dpte_group_bytes: &v->dpte_group_bytes[k],
2421 PixelPTEReqWidth: &v->PixelPTEReqWidthY[k],
2422 PixelPTEReqHeight: &v->PixelPTEReqHeightY[k],
2423 PTERequestSize: &v->PTERequestSizeY[k],
2424 DPDE0BytesFrame: &v->dpde0_bytes_per_frame_ub_l[k],
2425 MetaPTEBytesFrame: &v->meta_pte_bytes_per_frame_ub_l[k]);
2426
2427 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2428 mode_lib,
2429 VRatio: v->VRatio[k],
2430 vtaps: v->vtaps[k],
2431 Interlace: v->Interlace[k],
2432 ProgressiveToInterlaceUnitInOPP: v->ProgressiveToInterlaceUnitInOPP,
2433 SwathHeight: v->SwathHeightY[k],
2434 ViewportYStart: v->ViewportYStartY[k],
2435 VInitPreFill: &v->VInitPreFillY[k],
2436 MaxNumSwath: &v->MaxNumSwathY[k]);
2437 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2438 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2439 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2440
2441 CalculateRowBandwidth(
2442 GPUVMEnable: v->GPUVMEnable,
2443 SourcePixelFormat: v->SourcePixelFormat[k],
2444 VRatio: v->VRatio[k],
2445 VRatioChroma: v->VRatioChroma[k],
2446 DCCEnable: v->DCCEnable[k],
2447 LineTime: v->HTotal[k] / v->PixelClock[k],
2448 MetaRowByteLuma: MetaRowByteY,
2449 MetaRowByteChroma: MetaRowByteC,
2450 meta_row_height_luma: v->meta_row_height[k],
2451 meta_row_height_chroma: v->meta_row_height_chroma[k],
2452 PixelPTEBytesPerRowLuma: PixelPTEBytesPerRowY,
2453 PixelPTEBytesPerRowChroma: PixelPTEBytesPerRowC,
2454 dpte_row_height_luma: v->dpte_row_height[k],
2455 dpte_row_height_chroma: v->dpte_row_height_chroma[k],
2456 meta_row_bw: &v->meta_row_bw[k],
2457 dpte_row_bw: &v->dpte_row_bw[k]);
2458 }
2459
2460 v->TotalDCCActiveDPP = 0;
2461 v->TotalActiveDPP = 0;
2462 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2463 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2464 if (v->DCCEnable[k])
2465 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2466 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2467 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2468 NoChromaPlanes = false;
2469 }
2470
2471 ReorderBytes = v->NumberOfChannels
2472 * dml_max3(
2473 a: v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2474 b: v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2475 c: v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2476
2477 VMDataOnlyReturnBW = dml_min(
2478 a: dml_min(a: v->ReturnBusWidth * v->DCFCLK, b: v->FabricClock * v->FabricDatapathToDCNDataReturn)
2479 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2480 b: v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2481 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2482
2483#ifdef __DML_VBA_DEBUG__
2484 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2485 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2486 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2487 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2488 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2489 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2490 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2491 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2492 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2493 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2494 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2495#endif
2496
2497 if (v->GPUVMEnable && v->HostVMEnable)
2498 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2499
2500 v->UrgentExtraLatency = CalculateExtraLatency(
2501 RoundTripPingLatencyCycles: v->RoundTripPingLatencyCycles,
2502 ReorderingBytes: ReorderBytes,
2503 DCFCLK: v->DCFCLK,
2504 TotalNumberOfActiveDPP: v->TotalActiveDPP,
2505 PixelChunkSizeInKByte: v->PixelChunkSizeInKByte,
2506 TotalNumberOfDCCActiveDPP: v->TotalDCCActiveDPP,
2507 MetaChunkSize: v->MetaChunkSize,
2508 ReturnBW: v->ReturnBW,
2509 GPUVMEnable: v->GPUVMEnable,
2510 HostVMEnable: v->HostVMEnable,
2511 NumberOfActivePlanes: v->NumberOfActivePlanes,
2512 NumberOfDPP: v->DPPPerPlane,
2513 dpte_group_bytes: v->dpte_group_bytes,
2514 HostVMInefficiencyFactor,
2515 HostVMMinPageSize: v->HostVMMinPageSize,
2516 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels);
2517
2518 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2519
2520 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2521 if (v->BlendingAndTiming[k] == k) {
2522 if (v->WritebackEnable[k] == true) {
2523 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2524 + CalculateWriteBackDelay(
2525 WritebackPixelFormat: v->WritebackPixelFormat[k],
2526 WritebackHRatio: v->WritebackHRatio[k],
2527 WritebackVRatio: v->WritebackVRatio[k],
2528 WritebackVTaps: v->WritebackVTaps[k],
2529 WritebackDestinationWidth: v->WritebackDestinationWidth[k],
2530 WritebackDestinationHeight: v->WritebackDestinationHeight[k],
2531 WritebackSourceHeight: v->WritebackSourceHeight[k],
2532 HTotal: v->HTotal[k]) / v->DISPCLK;
2533 } else
2534 v->WritebackDelay[v->VoltageLevel][k] = 0;
2535 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2536 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2537 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2538 a: v->WritebackDelay[v->VoltageLevel][k],
2539 b: v->WritebackLatency
2540 + CalculateWriteBackDelay(
2541 WritebackPixelFormat: v->WritebackPixelFormat[j],
2542 WritebackHRatio: v->WritebackHRatio[j],
2543 WritebackVRatio: v->WritebackVRatio[j],
2544 WritebackVTaps: v->WritebackVTaps[j],
2545 WritebackDestinationWidth: v->WritebackDestinationWidth[j],
2546 WritebackDestinationHeight: v->WritebackDestinationHeight[j],
2547 WritebackSourceHeight: v->WritebackSourceHeight[j],
2548 HTotal: v->HTotal[k]) / v->DISPCLK);
2549 }
2550 }
2551 }
2552 }
2553
2554 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2555 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2556 if (v->BlendingAndTiming[k] == j)
2557 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2558
2559 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2560 v->MaxVStartupLines[k] =
2561 CalculateMaxVStartup(
2562 VTotal: v->VTotal[k],
2563 VActive: v->VActive[k],
2564 VBlankNom: v->VBlankNom[k],
2565 HTotal: v->HTotal[k],
2566 PixelClock: v->PixelClock[k],
2567 ProgressiveTointerlaceUnitinOPP: v->ProgressiveToInterlaceUnitInOPP,
2568 Interlace: v->Interlace[k],
2569 VBlankNomDefaultUS: v->ip.VBlankNomDefaultUS,
2570 WritebackDelayTime: v->WritebackDelay[v->VoltageLevel][k]);
2571
2572#ifdef __DML_VBA_DEBUG__
2573 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2574 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2575 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2576#endif
2577 }
2578
2579 v->MaximumMaxVStartupLines = 0;
2580 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2581 v->MaximumMaxVStartupLines = dml_max(a: v->MaximumMaxVStartupLines, b: v->MaxVStartupLines[k]);
2582
2583 // VBA_DELTA
2584 // We don't really care to iterate between the various prefetch modes
2585 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2586
2587 v->UrgentLatency = CalculateUrgentLatency(
2588 UrgentLatencyPixelDataOnly: v->UrgentLatencyPixelDataOnly,
2589 UrgentLatencyPixelMixedWithVMData: v->UrgentLatencyPixelMixedWithVMData,
2590 UrgentLatencyVMDataOnly: v->UrgentLatencyVMDataOnly,
2591 DoUrgentLatencyAdjustment: v->DoUrgentLatencyAdjustment,
2592 UrgentLatencyAdjustmentFabricClockComponent: v->UrgentLatencyAdjustmentFabricClockComponent,
2593 UrgentLatencyAdjustmentFabricClockReference: v->UrgentLatencyAdjustmentFabricClockReference,
2594 FabricClockSingle: v->FabricClock);
2595
2596 v->FractionOfUrgentBandwidth = 0.0;
2597 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2598
2599 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2600
2601 do {
2602 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2603 bool DestinationLineTimesForPrefetchLessThan2 = false;
2604 bool VRatioPrefetchMoreThan4 = false;
2605 double TWait = CalculateTWait(PrefetchMode, DRAMClockChangeLatency: v->DRAMClockChangeLatency, UrgentLatency: v->UrgentLatency, SREnterPlusExitTime: v->SREnterPlusExitTime);
2606
2607 MaxTotalRDBandwidth = 0;
2608
2609 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2610
2611 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2612 Pipe myPipe;
2613
2614 myPipe.DPPCLK = v->DPPCLK[k];
2615 myPipe.DISPCLK = v->DISPCLK;
2616 myPipe.PixelClock = v->PixelClock[k];
2617 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2618 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2619 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2620 myPipe.VRatio = v->VRatio[k];
2621 myPipe.VRatioChroma = v->VRatioChroma[k];
2622 myPipe.SourceScan = v->SourceScan[k];
2623 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2624 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2625 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2626 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2627 myPipe.InterlaceEnable = v->Interlace[k];
2628 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2629 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2630 myPipe.HTotal = v->HTotal[k];
2631 myPipe.DCCEnable = v->DCCEnable[k];
2632 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2633 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2634 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2635 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2636 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2637 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2638 v->ErrorResult[k] = CalculatePrefetchSchedule(
2639 mode_lib,
2640 HostVMInefficiencyFactor,
2641 myPipe: &myPipe,
2642 DSCDelay: v->DSCDelay[k],
2643 DPPCLKDelaySubtotalPlusCNVCFormater: v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2644 DPPCLKDelaySCL: v->DPPCLKDelaySCL,
2645 DPPCLKDelaySCLLBOnly: v->DPPCLKDelaySCLLBOnly,
2646 DPPCLKDelayCNVCCursor: v->DPPCLKDelayCNVCCursor,
2647 DISPCLKDelaySubtotal: v->DISPCLKDelaySubtotal,
2648 DPP_RECOUT_WIDTH: (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2649 OutputFormat: v->OutputFormat[k],
2650 MaxInterDCNTileRepeaters: v->MaxInterDCNTileRepeaters,
2651 VStartup: dml_min(a: v->VStartupLines, b: v->MaxVStartupLines[k]),
2652 MaxVStartup: v->MaxVStartupLines[k],
2653 GPUVMPageTableLevels: v->GPUVMMaxPageTableLevels,
2654 GPUVMEnable: v->GPUVMEnable,
2655 HostVMEnable: v->HostVMEnable,
2656 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels,
2657 HostVMMinPageSize: v->HostVMMinPageSize,
2658 DynamicMetadataEnable: v->DynamicMetadataEnable[k],
2659 DynamicMetadataVMEnabled: v->DynamicMetadataVMEnabled,
2660 DynamicMetadataLinesBeforeActiveRequired: v->DynamicMetadataLinesBeforeActiveRequired[k],
2661 DynamicMetadataTransmittedBytes: v->DynamicMetadataTransmittedBytes[k],
2662 UrgentLatency: v->UrgentLatency,
2663 UrgentExtraLatency: v->UrgentExtraLatency,
2664 TCalc: v->TCalc,
2665 PDEAndMetaPTEBytesFrame: v->PDEAndMetaPTEBytesFrame[k],
2666 MetaRowByte: v->MetaRowByte[k],
2667 PixelPTEBytesPerRow: v->PixelPTEBytesPerRow[k],
2668 PrefetchSourceLinesY: v->PrefetchSourceLinesY[k],
2669 SwathWidthY: v->SwathWidthY[k],
2670 VInitPreFillY: v->VInitPreFillY[k],
2671 MaxNumSwathY: v->MaxNumSwathY[k],
2672 PrefetchSourceLinesC: v->PrefetchSourceLinesC[k],
2673 SwathWidthC: v->SwathWidthC[k],
2674 VInitPreFillC: v->VInitPreFillC[k],
2675 MaxNumSwathC: v->MaxNumSwathC[k],
2676 swath_width_luma_ub: v->swath_width_luma_ub[k],
2677 swath_width_chroma_ub: v->swath_width_chroma_ub[k],
2678 SwathHeightY: v->SwathHeightY[k],
2679 SwathHeightC: v->SwathHeightC[k],
2680 TWait,
2681 DSTXAfterScaler: &v->DSTXAfterScaler[k],
2682 DSTYAfterScaler: &v->DSTYAfterScaler[k],
2683 DestinationLinesForPrefetch: &v->DestinationLinesForPrefetch[k],
2684 PrefetchBandwidth: &v->PrefetchBandwidth[k],
2685 DestinationLinesToRequestVMInVBlank: &v->DestinationLinesToRequestVMInVBlank[k],
2686 DestinationLinesToRequestRowInVBlank: &v->DestinationLinesToRequestRowInVBlank[k],
2687 VRatioPrefetchY: &v->VRatioPrefetchY[k],
2688 VRatioPrefetchC: &v->VRatioPrefetchC[k],
2689 RequiredPrefetchPixDataBWLuma: &v->RequiredPrefetchPixDataBWLuma[k],
2690 RequiredPrefetchPixDataBWChroma: &v->RequiredPrefetchPixDataBWChroma[k],
2691 NotEnoughTimeForDynamicMetadata: &v->NotEnoughTimeForDynamicMetadata[k],
2692 Tno_bw: &v->Tno_bw[k],
2693 prefetch_vmrow_bw: &v->prefetch_vmrow_bw[k],
2694 Tdmdl_vm: &v->Tdmdl_vm[k],
2695 Tdmdl: &v->Tdmdl[k],
2696 TSetup: &v->TSetup[k],
2697 VUpdateOffsetPix: &v->VUpdateOffsetPix[k],
2698 VUpdateWidthPix: &v->VUpdateWidthPix[k],
2699 VReadyOffsetPix: &v->VReadyOffsetPix[k]);
2700
2701#ifdef __DML_VBA_DEBUG__
2702 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2703#endif
2704 v->VStartup[k] = dml_min(a: v->VStartupLines, b: v->MaxVStartupLines[k]);
2705 }
2706
2707 v->NoEnoughUrgentLatencyHiding = false;
2708 v->NoEnoughUrgentLatencyHidingPre = false;
2709
2710 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2711 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2712 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2713 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2714 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2715
2716 CalculateUrgentBurstFactor(
2717 swath_width_luma_ub: v->swath_width_luma_ub[k],
2718 swath_width_chroma_ub: v->swath_width_chroma_ub[k],
2719 SwathHeightY: v->SwathHeightY[k],
2720 SwathHeightC: v->SwathHeightC[k],
2721 LineTime: v->HTotal[k] / v->PixelClock[k],
2722 UrgentLatency: v->UrgentLatency,
2723 CursorBufferSize: v->CursorBufferSize,
2724 CursorWidth: v->CursorWidth[k][0],
2725 CursorBPP: v->CursorBPP[k][0],
2726 VRatio: v->VRatio[k],
2727 VRatioC: v->VRatioChroma[k],
2728 BytePerPixelInDETY: v->BytePerPixelDETY[k],
2729 BytePerPixelInDETC: v->BytePerPixelDETC[k],
2730 DETBufferSizeY: v->DETBufferSizeY[k],
2731 DETBufferSizeC: v->DETBufferSizeC[k],
2732 UrgentBurstFactorCursor: &v->UrgBurstFactorCursor[k],
2733 UrgentBurstFactorLuma: &v->UrgBurstFactorLuma[k],
2734 UrgentBurstFactorChroma: &v->UrgBurstFactorChroma[k],
2735 NotEnoughUrgentLatencyHiding: &v->NoUrgentLatencyHiding[k]);
2736
2737 CalculateUrgentBurstFactor(
2738 swath_width_luma_ub: v->swath_width_luma_ub[k],
2739 swath_width_chroma_ub: v->swath_width_chroma_ub[k],
2740 SwathHeightY: v->SwathHeightY[k],
2741 SwathHeightC: v->SwathHeightC[k],
2742 LineTime: v->HTotal[k] / v->PixelClock[k],
2743 UrgentLatency: v->UrgentLatency,
2744 CursorBufferSize: v->CursorBufferSize,
2745 CursorWidth: v->CursorWidth[k][0],
2746 CursorBPP: v->CursorBPP[k][0],
2747 VRatio: v->VRatioPrefetchY[k],
2748 VRatioC: v->VRatioPrefetchC[k],
2749 BytePerPixelInDETY: v->BytePerPixelDETY[k],
2750 BytePerPixelInDETC: v->BytePerPixelDETC[k],
2751 DETBufferSizeY: v->DETBufferSizeY[k],
2752 DETBufferSizeC: v->DETBufferSizeC[k],
2753 UrgentBurstFactorCursor: &v->UrgBurstFactorCursorPre[k],
2754 UrgentBurstFactorLuma: &v->UrgBurstFactorLumaPre[k],
2755 UrgentBurstFactorChroma: &v->UrgBurstFactorChromaPre[k],
2756 NotEnoughUrgentLatencyHiding: &v->NoUrgentLatencyHidingPre[k]);
2757
2758 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2759 + dml_max3(
2760 a: v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2761 b: v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2762 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2763 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2764 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2765 c: v->DPPPerPlane[k]
2766 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2767 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2768 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2769
2770 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2771 + dml_max3(
2772 a: v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2773 b: v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2774 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2775 c: v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2776 + v->cursor_bw_pre[k]);
2777
2778#ifdef __DML_VBA_DEBUG__
2779 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2780 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2781 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2782 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2783 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2784
2785 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2786 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2787
2788 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2789 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2790 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2791 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2792 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2793 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2794 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2795 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2796 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2797 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2798#endif
2799
2800 if (v->DestinationLinesForPrefetch[k] < 2)
2801 DestinationLineTimesForPrefetchLessThan2 = true;
2802
2803 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2804 VRatioPrefetchMoreThan4 = true;
2805
2806 if (v->NoUrgentLatencyHiding[k] == true)
2807 v->NoEnoughUrgentLatencyHiding = true;
2808
2809 if (v->NoUrgentLatencyHidingPre[k] == true)
2810 v->NoEnoughUrgentLatencyHidingPre = true;
2811 }
2812
2813 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2814
2815#ifdef __DML_VBA_DEBUG__
2816 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2817 dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW);
2818 dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth);
2819#endif
2820
2821 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2822 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2823 v->PrefetchModeSupported = true;
2824 else {
2825 v->PrefetchModeSupported = false;
2826 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2827 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2828 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2829 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2830 }
2831
2832 // PREVIOUS_ERROR
2833 // This error result check was done after the PrefetchModeSupported. So we will
2834 // still try to calculate flip schedule even prefetch mode not supported
2835 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2836 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2837 v->PrefetchModeSupported = false;
2838 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2839 }
2840 }
2841
2842 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2843 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2844 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2845 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2846 - dml_max(
2847 a: v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2848 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2849 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2850 b: v->DPPPerPlane[k]
2851 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2852 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2853 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2854 }
2855
2856 v->TotImmediateFlipBytes = 0;
2857 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2858 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2859 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2860 }
2861 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2862 CalculateFlipSchedule(
2863 mode_lib,
2864 k,
2865 HostVMInefficiencyFactor,
2866 UrgentExtraLatency: v->UrgentExtraLatency,
2867 UrgentLatency: v->UrgentLatency,
2868 PDEAndMetaPTEBytesPerFrame: v->PDEAndMetaPTEBytesFrame[k],
2869 MetaRowBytes: v->MetaRowByte[k],
2870 DPTEBytesPerRow: v->PixelPTEBytesPerRow[k]);
2871 }
2872
2873 v->total_dcn_read_bw_with_flip = 0.0;
2874 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2875 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2876 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2877 + dml_max3(
2878 a: v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2879 b: v->DPPPerPlane[k] * v->final_flip_bw[k]
2880 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2881 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2882 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2883 c: v->DPPPerPlane[k]
2884 * (v->final_flip_bw[k]
2885 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2886 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2887 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2888 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2889 + dml_max3(
2890 a: v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2891 b: v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2892 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2893 c: v->DPPPerPlane[k]
2894 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2895 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2896 }
2897 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2898
2899 v->ImmediateFlipSupported = true;
2900 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2901#ifdef __DML_VBA_DEBUG__
2902 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2903#endif
2904 v->ImmediateFlipSupported = false;
2905 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2906 }
2907 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2908 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2909#ifdef __DML_VBA_DEBUG__
2910 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k);
2911#endif
2912 v->ImmediateFlipSupported = false;
2913 }
2914 }
2915 } else {
2916 v->ImmediateFlipSupported = false;
2917 }
2918
2919 v->PrefetchAndImmediateFlipSupported =
2920 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2921 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2922 v->ImmediateFlipSupported)) ? true : false;
2923#ifdef __DML_VBA_DEBUG__
2924 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2925 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required);
2926 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2927 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2928 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2929 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2930#endif
2931 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2932
2933 v->VStartupLines = v->VStartupLines + 1;
2934 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2935 ASSERT(v->PrefetchAndImmediateFlipSupported);
2936
2937 // Unbounded Request Enabled
2938 CalculateUnboundedRequestAndCompressedBufferSize(
2939 DETBufferSizeInKByte: v->DETBufferSizeInKByte[0],
2940 ConfigReturnBufferSizeInKByte: v->ConfigReturnBufferSizeInKByte,
2941 UseUnboundedRequestingFinal: v->UseUnboundedRequesting,
2942 TotalActiveDPP: v->TotalActiveDPP,
2943 NoChromaPlanes,
2944 MaxNumDPP: v->MaxNumDPP,
2945 CompressedBufferSegmentSizeInkByteFinal: v->CompressedBufferSegmentSizeInkByte,
2946 Output: v->Output,
2947 UnboundedRequestEnabled: &v->UnboundedRequestEnabled,
2948 CompressedBufferSizeInkByte: &v->CompressedBufferSizeInkByte);
2949
2950 //Watermarks and NB P-State/DRAM Clock Change Support
2951 {
2952 enum clock_change_support DRAMClockChangeSupport; // dummy
2953
2954 CalculateWatermarksAndDRAMSpeedChangeSupport(
2955 mode_lib,
2956 PrefetchMode,
2957 DCFCLK: v->DCFCLK,
2958 ReturnBW: v->ReturnBW,
2959 UrgentLatency: v->UrgentLatency,
2960 ExtraLatency: v->UrgentExtraLatency,
2961 SOCCLK: v->SOCCLK,
2962 DCFCLKDeepSleep: v->DCFCLKDeepSleep,
2963 DETBufferSizeY: v->DETBufferSizeY,
2964 DETBufferSizeC: v->DETBufferSizeC,
2965 SwathHeightY: v->SwathHeightY,
2966 SwathHeightC: v->SwathHeightC,
2967 SwathWidthY: v->SwathWidthY,
2968 SwathWidthC: v->SwathWidthC,
2969 DPPPerPlane: v->DPPPerPlane,
2970 BytePerPixelDETY: v->BytePerPixelDETY,
2971 BytePerPixelDETC: v->BytePerPixelDETC,
2972 UnboundedRequestEnabled: v->UnboundedRequestEnabled,
2973 CompressedBufferSizeInkByte: v->CompressedBufferSizeInkByte,
2974 DRAMClockChangeSupport: &DRAMClockChangeSupport,
2975 StutterExitWatermark: &v->StutterExitWatermark,
2976 StutterEnterPlusExitWatermark: &v->StutterEnterPlusExitWatermark,
2977 Z8StutterExitWatermark: &v->Z8StutterExitWatermark,
2978 Z8StutterEnterPlusExitWatermark: &v->Z8StutterEnterPlusExitWatermark);
2979
2980 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2981 if (v->WritebackEnable[k] == true) {
2982 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2983 a: 0,
2984 b: v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2985 } else {
2986 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2987 }
2988 }
2989 }
2990
2991 //Display Pipeline Delivery Time in Prefetch, Groups
2992 CalculatePixelDeliveryTimes(
2993 NumberOfActivePlanes: v->NumberOfActivePlanes,
2994 VRatio: v->VRatio,
2995 VRatioChroma: v->VRatioChroma,
2996 VRatioPrefetchY: v->VRatioPrefetchY,
2997 VRatioPrefetchC: v->VRatioPrefetchC,
2998 swath_width_luma_ub: v->swath_width_luma_ub,
2999 swath_width_chroma_ub: v->swath_width_chroma_ub,
3000 DPPPerPlane: v->DPPPerPlane,
3001 HRatio: v->HRatio,
3002 HRatioChroma: v->HRatioChroma,
3003 PixelClock: v->PixelClock,
3004 PSCL_THROUGHPUT: v->PSCL_THROUGHPUT_LUMA,
3005 PSCL_THROUGHPUT_CHROMA: v->PSCL_THROUGHPUT_CHROMA,
3006 DPPCLK: v->DPPCLK,
3007 BytePerPixelC: v->BytePerPixelC,
3008 SourceScan: v->SourceScan,
3009 NumberOfCursors: v->NumberOfCursors,
3010 CursorWidth: v->CursorWidth,
3011 CursorBPP: v->CursorBPP,
3012 BlockWidth256BytesY: v->BlockWidth256BytesY,
3013 BlockHeight256BytesY: v->BlockHeight256BytesY,
3014 BlockWidth256BytesC: v->BlockWidth256BytesC,
3015 BlockHeight256BytesC: v->BlockHeight256BytesC,
3016 DisplayPipeLineDeliveryTimeLuma: v->DisplayPipeLineDeliveryTimeLuma,
3017 DisplayPipeLineDeliveryTimeChroma: v->DisplayPipeLineDeliveryTimeChroma,
3018 DisplayPipeLineDeliveryTimeLumaPrefetch: v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3019 DisplayPipeLineDeliveryTimeChromaPrefetch: v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3020 DisplayPipeRequestDeliveryTimeLuma: v->DisplayPipeRequestDeliveryTimeLuma,
3021 DisplayPipeRequestDeliveryTimeChroma: v->DisplayPipeRequestDeliveryTimeChroma,
3022 DisplayPipeRequestDeliveryTimeLumaPrefetch: v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3023 DisplayPipeRequestDeliveryTimeChromaPrefetch: v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3024 CursorRequestDeliveryTime: v->CursorRequestDeliveryTime,
3025 CursorRequestDeliveryTimePrefetch: v->CursorRequestDeliveryTimePrefetch);
3026
3027 CalculateMetaAndPTETimes(
3028 NumberOfActivePlanes: v->NumberOfActivePlanes,
3029 GPUVMEnable: v->GPUVMEnable,
3030 MetaChunkSize: v->MetaChunkSize,
3031 MinMetaChunkSizeBytes: v->MinMetaChunkSizeBytes,
3032 HTotal: v->HTotal,
3033 VRatio: v->VRatio,
3034 VRatioChroma: v->VRatioChroma,
3035 DestinationLinesToRequestRowInVBlank: v->DestinationLinesToRequestRowInVBlank,
3036 DestinationLinesToRequestRowInImmediateFlip: v->DestinationLinesToRequestRowInImmediateFlip,
3037 DCCEnable: v->DCCEnable,
3038 PixelClock: v->PixelClock,
3039 BytePerPixelY: v->BytePerPixelY,
3040 BytePerPixelC: v->BytePerPixelC,
3041 SourceScan: v->SourceScan,
3042 dpte_row_height: v->dpte_row_height,
3043 dpte_row_height_chroma: v->dpte_row_height_chroma,
3044 meta_row_width: v->meta_row_width,
3045 meta_row_width_chroma: v->meta_row_width_chroma,
3046 meta_row_height: v->meta_row_height,
3047 meta_row_height_chroma: v->meta_row_height_chroma,
3048 meta_req_width: v->meta_req_width,
3049 meta_req_width_chroma: v->meta_req_width_chroma,
3050 meta_req_height: v->meta_req_height,
3051 meta_req_height_chroma: v->meta_req_height_chroma,
3052 dpte_group_bytes: v->dpte_group_bytes,
3053 PTERequestSizeY: v->PTERequestSizeY,
3054 PTERequestSizeC: v->PTERequestSizeC,
3055 PixelPTEReqWidthY: v->PixelPTEReqWidthY,
3056 PixelPTEReqHeightY: v->PixelPTEReqHeightY,
3057 PixelPTEReqWidthC: v->PixelPTEReqWidthC,
3058 PixelPTEReqHeightC: v->PixelPTEReqHeightC,
3059 dpte_row_width_luma_ub: v->dpte_row_width_luma_ub,
3060 dpte_row_width_chroma_ub: v->dpte_row_width_chroma_ub,
3061 DST_Y_PER_PTE_ROW_NOM_L: v->DST_Y_PER_PTE_ROW_NOM_L,
3062 DST_Y_PER_PTE_ROW_NOM_C: v->DST_Y_PER_PTE_ROW_NOM_C,
3063 DST_Y_PER_META_ROW_NOM_L: v->DST_Y_PER_META_ROW_NOM_L,
3064 DST_Y_PER_META_ROW_NOM_C: v->DST_Y_PER_META_ROW_NOM_C,
3065 TimePerMetaChunkNominal: v->TimePerMetaChunkNominal,
3066 TimePerChromaMetaChunkNominal: v->TimePerChromaMetaChunkNominal,
3067 TimePerMetaChunkVBlank: v->TimePerMetaChunkVBlank,
3068 TimePerChromaMetaChunkVBlank: v->TimePerChromaMetaChunkVBlank,
3069 TimePerMetaChunkFlip: v->TimePerMetaChunkFlip,
3070 TimePerChromaMetaChunkFlip: v->TimePerChromaMetaChunkFlip,
3071 time_per_pte_group_nom_luma: v->time_per_pte_group_nom_luma,
3072 time_per_pte_group_vblank_luma: v->time_per_pte_group_vblank_luma,
3073 time_per_pte_group_flip_luma: v->time_per_pte_group_flip_luma,
3074 time_per_pte_group_nom_chroma: v->time_per_pte_group_nom_chroma,
3075 time_per_pte_group_vblank_chroma: v->time_per_pte_group_vblank_chroma,
3076 time_per_pte_group_flip_chroma: v->time_per_pte_group_flip_chroma);
3077
3078 CalculateVMGroupAndRequestTimes(
3079 NumberOfActivePlanes: v->NumberOfActivePlanes,
3080 GPUVMEnable: v->GPUVMEnable,
3081 GPUVMMaxPageTableLevels: v->GPUVMMaxPageTableLevels,
3082 HTotal: v->HTotal,
3083 BytePerPixelC: v->BytePerPixelC,
3084 DestinationLinesToRequestVMInVBlank: v->DestinationLinesToRequestVMInVBlank,
3085 DestinationLinesToRequestVMInImmediateFlip: v->DestinationLinesToRequestVMInImmediateFlip,
3086 DCCEnable: v->DCCEnable,
3087 PixelClock: v->PixelClock,
3088 dpte_row_width_luma_ub: v->dpte_row_width_luma_ub,
3089 dpte_row_width_chroma_ub: v->dpte_row_width_chroma_ub,
3090 vm_group_bytes: v->vm_group_bytes,
3091 dpde0_bytes_per_frame_ub_l: v->dpde0_bytes_per_frame_ub_l,
3092 dpde0_bytes_per_frame_ub_c: v->dpde0_bytes_per_frame_ub_c,
3093 meta_pte_bytes_per_frame_ub_l: v->meta_pte_bytes_per_frame_ub_l,
3094 meta_pte_bytes_per_frame_ub_c: v->meta_pte_bytes_per_frame_ub_c,
3095 TimePerVMGroupVBlank: v->TimePerVMGroupVBlank,
3096 TimePerVMGroupFlip: v->TimePerVMGroupFlip,
3097 TimePerVMRequestVBlank: v->TimePerVMRequestVBlank,
3098 TimePerVMRequestFlip: v->TimePerVMRequestFlip);
3099
3100 // Min TTUVBlank
3101 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3102 if (PrefetchMode == 0) {
3103 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3104 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3105 v->MinTTUVBlank[k] = dml_max(
3106 a: v->DRAMClockChangeWatermark,
3107 b: dml_max(a: v->StutterEnterPlusExitWatermark, b: v->UrgentWatermark));
3108 } else if (PrefetchMode == 1) {
3109 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3110 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3111 v->MinTTUVBlank[k] = dml_max(a: v->StutterEnterPlusExitWatermark, b: v->UrgentWatermark);
3112 } else {
3113 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3114 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3115 v->MinTTUVBlank[k] = v->UrgentWatermark;
3116 }
3117 if (!v->DynamicMetadataEnable[k])
3118 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3119 }
3120
3121 // DCC Configuration
3122 v->ActiveDPPs = 0;
3123 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3124 CalculateDCCConfiguration(DCCEnabled: v->DCCEnable[k], DCCProgrammingAssumesScanDirectionUnknown: false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3125 SourcePixelFormat: v->SourcePixelFormat[k],
3126 SurfaceWidthLuma: v->SurfaceWidthY[k],
3127 SurfaceWidthChroma: v->SurfaceWidthC[k],
3128 SurfaceHeightLuma: v->SurfaceHeightY[k],
3129 SurfaceHeightChroma: v->SurfaceHeightC[k],
3130 DETBufferSize: v->DETBufferSizeInKByte[0] * 1024,
3131 RequestHeight256ByteLuma: v->BlockHeight256BytesY[k],
3132 RequestHeight256ByteChroma: v->BlockHeight256BytesC[k],
3133 TilingFormat: v->SurfaceTiling[k],
3134 BytePerPixelY: v->BytePerPixelY[k],
3135 BytePerPixelC: v->BytePerPixelC[k],
3136 BytePerPixelDETY: v->BytePerPixelDETY[k],
3137 BytePerPixelDETC: v->BytePerPixelDETC[k],
3138 ScanOrientation: v->SourceScan[k],
3139 MaxUncompressedBlockLuma: &v->DCCYMaxUncompressedBlock[k],
3140 MaxUncompressedBlockChroma: &v->DCCCMaxUncompressedBlock[k],
3141 MaxCompressedBlockLuma: &v->DCCYMaxCompressedBlock[k],
3142 MaxCompressedBlockChroma: &v->DCCCMaxCompressedBlock[k],
3143 IndependentBlockLuma: &v->DCCYIndependentBlock[k],
3144 IndependentBlockChroma: &v->DCCCIndependentBlock[k]);
3145 }
3146
3147 // VStartup Adjustment
3148 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3149 bool isInterlaceTiming;
3150 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3151#ifdef __DML_VBA_DEBUG__
3152 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3153#endif
3154
3155 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3156
3157#ifdef __DML_VBA_DEBUG__
3158 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3159 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3160 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3161 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3162#endif
3163
3164 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3165 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3166 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3167 }
3168
3169 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3170 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3171 if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) {
3172 v->MIN_DST_Y_NEXT_START[k] = dml_floor(a: (v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, granularity: 1.0);
3173 } else {
3174 v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k];
3175 }
3176 v->MIN_DST_Y_NEXT_START[k] += dml_floor(a: 4.0 * v->TSetup[k] / ((double)v->HTotal[k] / v->PixelClock[k]), granularity: 1.0) / 4.0;
3177 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3178 <= (isInterlaceTiming ?
3179 dml_floor(a: (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, granularity: 1.0) :
3180 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3181 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3182 } else {
3183 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3184 }
3185#ifdef __DML_VBA_DEBUG__
3186 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3187 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3188 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3189 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3190 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3191 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3192 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3193 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3194 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3195 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3196 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3197#endif
3198 }
3199
3200 {
3201 //Maximum Bandwidth Used
3202 double TotalWRBandwidth = 0;
3203 double MaxPerPlaneVActiveWRBandwidth = 0;
3204 double WRBandwidth = 0;
3205
3206 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3207 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3208 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3209 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3210 } else if (v->WritebackEnable[k] == true) {
3211 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3212 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3213 }
3214 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3215 MaxPerPlaneVActiveWRBandwidth = dml_max(a: MaxPerPlaneVActiveWRBandwidth, b: WRBandwidth);
3216 }
3217
3218 v->TotalDataReadBandwidth = 0;
3219 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3220 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3221 }
3222 }
3223 // Stutter Efficiency
3224 CalculateStutterEfficiency(
3225 mode_lib,
3226 CompressedBufferSizeInkByte: v->CompressedBufferSizeInkByte,
3227 UnboundedRequestEnabled: v->UnboundedRequestEnabled,
3228 ConfigReturnBufferSizeInKByte: v->ConfigReturnBufferSizeInKByte,
3229 MetaFIFOSizeInKEntries: v->MetaFIFOSizeInKEntries,
3230 ZeroSizeBufferEntries: v->ZeroSizeBufferEntries,
3231 NumberOfActivePlanes: v->NumberOfActivePlanes,
3232 ROBBufferSizeInKByte: v->ROBBufferSizeInKByte,
3233 TotalDataReadBandwidth: v->TotalDataReadBandwidth,
3234 DCFCLK: v->DCFCLK,
3235 ReturnBW: v->ReturnBW,
3236 COMPBUF_RESERVED_SPACE_64B: v->COMPBUF_RESERVED_SPACE_64B,
3237 COMPBUF_RESERVED_SPACE_ZS: v->COMPBUF_RESERVED_SPACE_ZS,
3238 SRExitTime: v->SRExitTime,
3239 SRExitZ8Time: v->SRExitZ8Time,
3240 SynchronizedVBlank: v->SynchronizedVBlank,
3241 Z8StutterEnterPlusExitWatermark: v->StutterEnterPlusExitWatermark,
3242 StutterEnterPlusExitWatermark: v->Z8StutterEnterPlusExitWatermark,
3243 ProgressiveToInterlaceUnitInOPP: v->ProgressiveToInterlaceUnitInOPP,
3244 Interlace: v->Interlace,
3245 MinTTUVBlank: v->MinTTUVBlank,
3246 DPPPerPlane: v->DPPPerPlane,
3247 DETBufferSizeY: v->DETBufferSizeY,
3248 BytePerPixelY: v->BytePerPixelY,
3249 BytePerPixelDETY: v->BytePerPixelDETY,
3250 SwathWidthY: v->SwathWidthY,
3251 SwathHeightY: v->SwathHeightY,
3252 SwathHeightC: v->SwathHeightC,
3253 NetDCCRateLuma: v->DCCRateLuma,
3254 NetDCCRateChroma: v->DCCRateChroma,
3255 DCCFractionOfZeroSizeRequestsLuma: v->DCCFractionOfZeroSizeRequestsLuma,
3256 DCCFractionOfZeroSizeRequestsChroma: v->DCCFractionOfZeroSizeRequestsChroma,
3257 HTotal: v->HTotal,
3258 VTotal: v->VTotal,
3259 PixelClock: v->PixelClock,
3260 VRatio: v->VRatio,
3261 SourceScan: v->SourceScan,
3262 BlockHeight256BytesY: v->BlockHeight256BytesY,
3263 BlockWidth256BytesY: v->BlockWidth256BytesY,
3264 BlockHeight256BytesC: v->BlockHeight256BytesC,
3265 BlockWidth256BytesC: v->BlockWidth256BytesC,
3266 DCCYMaxUncompressedBlock: v->DCCYMaxUncompressedBlock,
3267 DCCCMaxUncompressedBlock: v->DCCCMaxUncompressedBlock,
3268 VActive: v->VActive,
3269 DCCEnable: v->DCCEnable,
3270 WritebackEnable: v->WritebackEnable,
3271 ReadBandwidthPlaneLuma: v->ReadBandwidthPlaneLuma,
3272 ReadBandwidthPlaneChroma: v->ReadBandwidthPlaneChroma,
3273 meta_row_bw: v->meta_row_bw,
3274 dpte_row_bw: v->dpte_row_bw,
3275 StutterEfficiencyNotIncludingVBlank: &v->StutterEfficiencyNotIncludingVBlank,
3276 StutterEfficiency: &v->StutterEfficiency,
3277 NumberOfStutterBurstsPerFrame: &v->NumberOfStutterBurstsPerFrame,
3278 Z8StutterEfficiencyNotIncludingVBlank: &v->Z8StutterEfficiencyNotIncludingVBlank,
3279 Z8StutterEfficiency: &v->Z8StutterEfficiency,
3280 Z8NumberOfStutterBurstsPerFrame: &v->Z8NumberOfStutterBurstsPerFrame,
3281 StutterPeriod: &v->StutterPeriod);
3282}
3283
3284static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3285{
3286 struct vba_vars_st *v = &mode_lib->vba;
3287 // Display Pipe Configuration
3288 double BytePerPixDETY[DC__NUM_DPP__MAX];
3289 double BytePerPixDETC[DC__NUM_DPP__MAX];
3290 int BytePerPixY[DC__NUM_DPP__MAX];
3291 int BytePerPixC[DC__NUM_DPP__MAX];
3292 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3293 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3294 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3295 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3296 double dummy1[DC__NUM_DPP__MAX];
3297 double dummy2[DC__NUM_DPP__MAX];
3298 double dummy3[DC__NUM_DPP__MAX];
3299 double dummy4[DC__NUM_DPP__MAX];
3300 int dummy5[DC__NUM_DPP__MAX];
3301 int dummy6[DC__NUM_DPP__MAX];
3302 bool dummy7[DC__NUM_DPP__MAX];
3303 bool dummysinglestring;
3304
3305 unsigned int k;
3306
3307 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3308
3309 CalculateBytePerPixelAnd256BBlockSizes(
3310 SourcePixelFormat: v->SourcePixelFormat[k],
3311 SurfaceTiling: v->SurfaceTiling[k],
3312 BytePerPixelY: &BytePerPixY[k],
3313 BytePerPixelC: &BytePerPixC[k],
3314 BytePerPixelDETY: &BytePerPixDETY[k],
3315 BytePerPixelDETC: &BytePerPixDETC[k],
3316 BlockHeight256BytesY: &Read256BytesBlockHeightY[k],
3317 BlockHeight256BytesC: &Read256BytesBlockHeightC[k],
3318 BlockWidth256BytesY: &Read256BytesBlockWidthY[k],
3319 BlockWidth256BytesC: &Read256BytesBlockWidthC[k]);
3320 }
3321
3322 CalculateSwathAndDETConfiguration(
3323 ForceSingleDPP: false,
3324 NumberOfActivePlanes: v->NumberOfActivePlanes,
3325 DETBufferSizeInKByte: v->DETBufferSizeInKByte[0],
3326 MaximumSwathWidthLuma: dummy1,
3327 MaximumSwathWidthChroma: dummy2,
3328 SourceScan: v->SourceScan,
3329 SourcePixelFormat: v->SourcePixelFormat,
3330 SurfaceTiling: v->SurfaceTiling,
3331 ViewportWidth: v->ViewportWidth,
3332 ViewportHeight: v->ViewportHeight,
3333 SurfaceWidthY: v->SurfaceWidthY,
3334 SurfaceWidthC: v->SurfaceWidthC,
3335 SurfaceHeightY: v->SurfaceHeightY,
3336 SurfaceHeightC: v->SurfaceHeightC,
3337 Read256BytesBlockHeightY,
3338 Read256BytesBlockHeightC,
3339 Read256BytesBlockWidthY,
3340 Read256BytesBlockWidthC,
3341 ODMCombineEnabled: v->ODMCombineEnabled,
3342 BlendingAndTiming: v->BlendingAndTiming,
3343 BytePerPixY,
3344 BytePerPixC,
3345 BytePerPixDETY,
3346 BytePerPixDETC,
3347 HActive: v->HActive,
3348 HRatio: v->HRatio,
3349 HRatioChroma: v->HRatioChroma,
3350 DPPPerPlane: v->DPPPerPlane,
3351 swath_width_luma_ub: dummy5,
3352 swath_width_chroma_ub: dummy6,
3353 SwathWidth: dummy3,
3354 SwathWidthChroma: dummy4,
3355 SwathHeightY: v->SwathHeightY,
3356 SwathHeightC: v->SwathHeightC,
3357 DETBufferSizeY: v->DETBufferSizeY,
3358 DETBufferSizeC: v->DETBufferSizeC,
3359 ViewportSizeSupportPerPlane: dummy7,
3360 ViewportSizeSupport: &dummysinglestring);
3361}
3362
3363static bool CalculateBytePerPixelAnd256BBlockSizes(
3364 enum source_format_class SourcePixelFormat,
3365 enum dm_swizzle_mode SurfaceTiling,
3366 unsigned int *BytePerPixelY,
3367 unsigned int *BytePerPixelC,
3368 double *BytePerPixelDETY,
3369 double *BytePerPixelDETC,
3370 unsigned int *BlockHeight256BytesY,
3371 unsigned int *BlockHeight256BytesC,
3372 unsigned int *BlockWidth256BytesY,
3373 unsigned int *BlockWidth256BytesC)
3374{
3375 if (SourcePixelFormat == dm_444_64) {
3376 *BytePerPixelDETY = 8;
3377 *BytePerPixelDETC = 0;
3378 *BytePerPixelY = 8;
3379 *BytePerPixelC = 0;
3380 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
3381 *BytePerPixelDETY = 4;
3382 *BytePerPixelDETC = 0;
3383 *BytePerPixelY = 4;
3384 *BytePerPixelC = 0;
3385 } else if (SourcePixelFormat == dm_444_16) {
3386 *BytePerPixelDETY = 2;
3387 *BytePerPixelDETC = 0;
3388 *BytePerPixelY = 2;
3389 *BytePerPixelC = 0;
3390 } else if (SourcePixelFormat == dm_444_8) {
3391 *BytePerPixelDETY = 1;
3392 *BytePerPixelDETC = 0;
3393 *BytePerPixelY = 1;
3394 *BytePerPixelC = 0;
3395 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3396 *BytePerPixelDETY = 4;
3397 *BytePerPixelDETC = 1;
3398 *BytePerPixelY = 4;
3399 *BytePerPixelC = 1;
3400 } else if (SourcePixelFormat == dm_420_8) {
3401 *BytePerPixelDETY = 1;
3402 *BytePerPixelDETC = 2;
3403 *BytePerPixelY = 1;
3404 *BytePerPixelC = 2;
3405 } else if (SourcePixelFormat == dm_420_12) {
3406 *BytePerPixelDETY = 2;
3407 *BytePerPixelDETC = 4;
3408 *BytePerPixelY = 2;
3409 *BytePerPixelC = 4;
3410 } else {
3411 *BytePerPixelDETY = 4.0 / 3;
3412 *BytePerPixelDETC = 8.0 / 3;
3413 *BytePerPixelY = 2;
3414 *BytePerPixelC = 4;
3415 }
3416
3417 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16
3418 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) {
3419 if (SurfaceTiling == dm_sw_linear) {
3420 *BlockHeight256BytesY = 1;
3421 } else if (SourcePixelFormat == dm_444_64) {
3422 *BlockHeight256BytesY = 4;
3423 } else if (SourcePixelFormat == dm_444_8) {
3424 *BlockHeight256BytesY = 16;
3425 } else {
3426 *BlockHeight256BytesY = 8;
3427 }
3428 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3429 *BlockHeight256BytesC = 0;
3430 *BlockWidth256BytesC = 0;
3431 } else {
3432 if (SurfaceTiling == dm_sw_linear) {
3433 *BlockHeight256BytesY = 1;
3434 *BlockHeight256BytesC = 1;
3435 } else if (SourcePixelFormat == dm_rgbe_alpha) {
3436 *BlockHeight256BytesY = 8;
3437 *BlockHeight256BytesC = 16;
3438 } else if (SourcePixelFormat == dm_420_8) {
3439 *BlockHeight256BytesY = 16;
3440 *BlockHeight256BytesC = 8;
3441 } else {
3442 *BlockHeight256BytesY = 8;
3443 *BlockHeight256BytesC = 8;
3444 }
3445 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
3446 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
3447 }
3448 return true;
3449}
3450
3451static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3452{
3453 if (PrefetchMode == 0) {
3454 return dml_max(a: DRAMClockChangeLatency + UrgentLatency, b: dml_max(a: SREnterPlusExitTime, b: UrgentLatency));
3455 } else if (PrefetchMode == 1) {
3456 return dml_max(a: SREnterPlusExitTime, b: UrgentLatency);
3457 } else {
3458 return UrgentLatency;
3459 }
3460}
3461
3462double dml314_CalculateWriteBackDISPCLK(
3463 enum source_format_class WritebackPixelFormat,
3464 double PixelClock,
3465 double WritebackHRatio,
3466 double WritebackVRatio,
3467 unsigned int WritebackHTaps,
3468 unsigned int WritebackVTaps,
3469 long WritebackSourceWidth,
3470 long WritebackDestinationWidth,
3471 unsigned int HTotal,
3472 unsigned int WritebackLineBufferSize)
3473{
3474 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3475
3476 DISPCLK_H = PixelClock * dml_ceil(a: WritebackHTaps / 8.0, granularity: 1) / WritebackHRatio;
3477 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(a: WritebackDestinationWidth / 6.0, granularity: 1) + 8.0) / HTotal;
3478 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3479 return dml_max3(a: DISPCLK_H, b: DISPCLK_V, c: DISPCLK_HB);
3480}
3481
3482static double CalculateWriteBackDelay(
3483 enum source_format_class WritebackPixelFormat,
3484 double WritebackHRatio,
3485 double WritebackVRatio,
3486 unsigned int WritebackVTaps,
3487 int WritebackDestinationWidth,
3488 int WritebackDestinationHeight,
3489 int WritebackSourceHeight,
3490 unsigned int HTotal)
3491{
3492 double CalculateWriteBackDelay;
3493 double Line_length;
3494 double Output_lines_last_notclamped;
3495 double WritebackVInit;
3496
3497 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3498 Line_length = dml_max(a: (double) WritebackDestinationWidth, b: dml_ceil(a: WritebackDestinationWidth / 6.0, granularity: 1) * WritebackVTaps);
3499 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil(a: (WritebackSourceHeight - WritebackVInit) / WritebackVRatio, granularity: 1);
3500 if (Output_lines_last_notclamped < 0) {
3501 CalculateWriteBackDelay = 0;
3502 } else {
3503 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3504 }
3505 return CalculateWriteBackDelay;
3506}
3507
3508static void CalculateVupdateAndDynamicMetadataParameters(
3509 int MaxInterDCNTileRepeaters,
3510 double DPPCLK,
3511 double DISPCLK,
3512 double DCFClkDeepSleep,
3513 double PixelClock,
3514 int HTotal,
3515 int VBlank,
3516 int DynamicMetadataTransmittedBytes,
3517 int DynamicMetadataLinesBeforeActiveRequired,
3518 int InterlaceEnable,
3519 bool ProgressiveToInterlaceUnitInOPP,
3520 double *TSetup,
3521 double *Tdmbf,
3522 double *Tdmec,
3523 double *Tdmsks,
3524 int *VUpdateOffsetPix,
3525 double *VUpdateWidthPix,
3526 double *VReadyOffsetPix)
3527{
3528 double TotalRepeaterDelayTime;
3529
3530 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3531 *VUpdateWidthPix = dml_ceil(a: (14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, granularity: 1.0);
3532 *VReadyOffsetPix = dml_ceil(a: dml_max(a: 150.0 / DPPCLK, b: TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, granularity: 1.0);
3533 *VUpdateOffsetPix = dml_ceil(a: HTotal / 4.0, granularity: 1);
3534 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3535 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3536 *Tdmec = HTotal / PixelClock;
3537 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3538 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3539 } else {
3540 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3541 }
3542 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3543 *Tdmsks = *Tdmsks / 2;
3544 }
3545#ifdef __DML_VBA_DEBUG__
3546 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3547 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3548 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3549#endif
3550}
3551
3552static void CalculateRowBandwidth(
3553 bool GPUVMEnable,
3554 enum source_format_class SourcePixelFormat,
3555 double VRatio,
3556 double VRatioChroma,
3557 bool DCCEnable,
3558 double LineTime,
3559 unsigned int MetaRowByteLuma,
3560 unsigned int MetaRowByteChroma,
3561 unsigned int meta_row_height_luma,
3562 unsigned int meta_row_height_chroma,
3563 unsigned int PixelPTEBytesPerRowLuma,
3564 unsigned int PixelPTEBytesPerRowChroma,
3565 unsigned int dpte_row_height_luma,
3566 unsigned int dpte_row_height_chroma,
3567 double *meta_row_bw,
3568 double *dpte_row_bw)
3569{
3570 if (DCCEnable != true) {
3571 *meta_row_bw = 0;
3572 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3573 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3574 } else {
3575 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3576 }
3577
3578 if (GPUVMEnable != true) {
3579 *dpte_row_bw = 0;
3580 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3581 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3582 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3583 } else {
3584 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3585 }
3586}
3587
3588static void CalculateFlipSchedule(
3589 struct display_mode_lib *mode_lib,
3590 unsigned int k,
3591 double HostVMInefficiencyFactor,
3592 double UrgentExtraLatency,
3593 double UrgentLatency,
3594 double PDEAndMetaPTEBytesPerFrame,
3595 double MetaRowBytes,
3596 double DPTEBytesPerRow)
3597{
3598 struct vba_vars_st *v = &mode_lib->vba;
3599 double min_row_time = 0.0;
3600 unsigned int HostVMDynamicLevelsTrips;
3601 double TimeForFetchingMetaPTEImmediateFlip;
3602 double TimeForFetchingRowInVBlankImmediateFlip;
3603 double ImmediateFlipBW = 1.0;
3604 double LineTime = v->HTotal[k] / v->PixelClock[k];
3605
3606 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3607 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3608 } else {
3609 HostVMDynamicLevelsTrips = 0;
3610 }
3611
3612 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3613 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3614 }
3615
3616 if (v->GPUVMEnable == true) {
3617 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3618 a: v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3619 b: UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3620 c: LineTime / 4.0);
3621 } else {
3622 TimeForFetchingMetaPTEImmediateFlip = 0;
3623 }
3624
3625 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(a: 4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), granularity: 1) / 4.0;
3626 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3627 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3628 a: (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3629 b: UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3630 c: LineTime / 4);
3631 } else {
3632 TimeForFetchingRowInVBlankImmediateFlip = 0;
3633 }
3634
3635 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(a: 4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), granularity: 1) / 4.0;
3636
3637 if (v->GPUVMEnable == true) {
3638 v->final_flip_bw[k] = dml_max(
3639 a: PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3640 b: (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3641 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3642 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3643 } else {
3644 v->final_flip_bw[k] = 0;
3645 }
3646
3647 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3648 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3649 min_row_time = dml_min(a: v->dpte_row_height[k] * LineTime / v->VRatio[k], b: v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3650 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3651 min_row_time = dml_min(a: v->meta_row_height[k] * LineTime / v->VRatio[k], b: v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3652 } else {
3653 min_row_time = dml_min4(
3654 a: v->dpte_row_height[k] * LineTime / v->VRatio[k],
3655 b: v->meta_row_height[k] * LineTime / v->VRatio[k],
3656 c: v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3657 d: v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3658 }
3659 } else {
3660 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3661 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3662 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3663 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3664 } else {
3665 min_row_time = dml_min(a: v->dpte_row_height[k] * LineTime / v->VRatio[k], b: v->meta_row_height[k] * LineTime / v->VRatio[k]);
3666 }
3667 }
3668
3669 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3670 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3671 v->ImmediateFlipSupportedForPipe[k] = false;
3672 } else {
3673 v->ImmediateFlipSupportedForPipe[k] = true;
3674 }
3675
3676#ifdef __DML_VBA_DEBUG__
3677 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3678 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3679 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3680 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3681 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3682 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3683#endif
3684
3685}
3686
3687static double TruncToValidBPP(
3688 double LinkBitRate,
3689 int Lanes,
3690 int HTotal,
3691 int HActive,
3692 double PixelClock,
3693 double DesiredBPP,
3694 bool DSCEnable,
3695 enum output_encoder_class Output,
3696 enum output_format_class Format,
3697 unsigned int DSCInputBitPerComponent,
3698 int DSCSlices,
3699 int AudioRate,
3700 int AudioLayout,
3701 enum odm_combine_mode ODMCombine)
3702{
3703 double MaxLinkBPP;
3704 int MinDSCBPP;
3705 double MaxDSCBPP;
3706 int NonDSCBPP0;
3707 int NonDSCBPP1;
3708 int NonDSCBPP2;
3709
3710 if (Format == dm_420) {
3711 NonDSCBPP0 = 12;
3712 NonDSCBPP1 = 15;
3713 NonDSCBPP2 = 18;
3714 MinDSCBPP = 6;
3715 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3716 } else if (Format == dm_444) {
3717 NonDSCBPP0 = 24;
3718 NonDSCBPP1 = 30;
3719 NonDSCBPP2 = 36;
3720 MinDSCBPP = 8;
3721 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3722 } else {
3723
3724 NonDSCBPP0 = 16;
3725 NonDSCBPP1 = 20;
3726 NonDSCBPP2 = 24;
3727
3728 if (Format == dm_n422) {
3729 MinDSCBPP = 7;
3730 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3731 } else {
3732 MinDSCBPP = 8;
3733 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3734 }
3735 }
3736
3737 if (DSCEnable && Output == dm_dp) {
3738 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3739 } else {
3740 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3741 }
3742
3743 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3744 MaxLinkBPP = 16;
3745 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3746 MaxLinkBPP = 32;
3747 }
3748
3749 if (DesiredBPP == 0) {
3750 if (DSCEnable) {
3751 if (MaxLinkBPP < MinDSCBPP) {
3752 return BPP_INVALID;
3753 } else if (MaxLinkBPP >= MaxDSCBPP) {
3754 return MaxDSCBPP;
3755 } else {
3756 return dml_floor(a: 16.0 * MaxLinkBPP, granularity: 1.0) / 16.0;
3757 }
3758 } else {
3759 if (MaxLinkBPP >= NonDSCBPP2) {
3760 return NonDSCBPP2;
3761 } else if (MaxLinkBPP >= NonDSCBPP1) {
3762 return NonDSCBPP1;
3763 } else if (MaxLinkBPP >= NonDSCBPP0) {
3764 return 16.0;
3765 } else {
3766 return BPP_INVALID;
3767 }
3768 }
3769 } else {
3770 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3771 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3772 return BPP_INVALID;
3773 } else {
3774 return DesiredBPP;
3775 }
3776 }
3777}
3778
3779static noinline void CalculatePrefetchSchedulePerPlane(
3780 struct display_mode_lib *mode_lib,
3781 double HostVMInefficiencyFactor,
3782 int i,
3783 unsigned int j,
3784 unsigned int k)
3785{
3786 struct vba_vars_st *v = &mode_lib->vba;
3787 Pipe myPipe;
3788
3789 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3790 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3791 myPipe.PixelClock = v->PixelClock[k];
3792 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3793 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3794 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3795 myPipe.VRatio = mode_lib->vba.VRatio[k];
3796 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3797
3798 myPipe.SourceScan = v->SourceScan[k];
3799 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3800 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3801 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3802 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3803 myPipe.InterlaceEnable = v->Interlace[k];
3804 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3805 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3806 myPipe.HTotal = v->HTotal[k];
3807 myPipe.DCCEnable = v->DCCEnable[k];
3808 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3809 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3810 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3811 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3812 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3813 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3814 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3815 mode_lib,
3816 HostVMInefficiencyFactor,
3817 myPipe: &myPipe,
3818 DSCDelay: v->DSCDelayPerState[i][k],
3819 DPPCLKDelaySubtotalPlusCNVCFormater: v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3820 DPPCLKDelaySCL: v->DPPCLKDelaySCL,
3821 DPPCLKDelaySCLLBOnly: v->DPPCLKDelaySCLLBOnly,
3822 DPPCLKDelayCNVCCursor: v->DPPCLKDelayCNVCCursor,
3823 DISPCLKDelaySubtotal: v->DISPCLKDelaySubtotal,
3824 DPP_RECOUT_WIDTH: v->SwathWidthYThisState[k] / v->HRatio[k],
3825 OutputFormat: v->OutputFormat[k],
3826 MaxInterDCNTileRepeaters: v->MaxInterDCNTileRepeaters,
3827 VStartup: dml_min(a: v->MaxVStartup, b: v->MaximumVStartup[i][j][k]),
3828 MaxVStartup: v->MaximumVStartup[i][j][k],
3829 GPUVMPageTableLevels: v->GPUVMMaxPageTableLevels,
3830 GPUVMEnable: v->GPUVMEnable,
3831 HostVMEnable: v->HostVMEnable,
3832 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels,
3833 HostVMMinPageSize: v->HostVMMinPageSize,
3834 DynamicMetadataEnable: v->DynamicMetadataEnable[k],
3835 DynamicMetadataVMEnabled: v->DynamicMetadataVMEnabled,
3836 DynamicMetadataLinesBeforeActiveRequired: v->DynamicMetadataLinesBeforeActiveRequired[k],
3837 DynamicMetadataTransmittedBytes: v->DynamicMetadataTransmittedBytes[k],
3838 UrgentLatency: v->UrgLatency[i],
3839 UrgentExtraLatency: v->ExtraLatency,
3840 TCalc: v->TimeCalc,
3841 PDEAndMetaPTEBytesFrame: v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3842 MetaRowByte: v->MetaRowBytes[i][j][k],
3843 PixelPTEBytesPerRow: v->DPTEBytesPerRow[i][j][k],
3844 PrefetchSourceLinesY: v->PrefetchLinesY[i][j][k],
3845 SwathWidthY: v->SwathWidthYThisState[k],
3846 VInitPreFillY: v->PrefillY[k],
3847 MaxNumSwathY: v->MaxNumSwY[k],
3848 PrefetchSourceLinesC: v->PrefetchLinesC[i][j][k],
3849 SwathWidthC: v->SwathWidthCThisState[k],
3850 VInitPreFillC: v->PrefillC[k],
3851 MaxNumSwathC: v->MaxNumSwC[k],
3852 swath_width_luma_ub: v->swath_width_luma_ub_this_state[k],
3853 swath_width_chroma_ub: v->swath_width_chroma_ub_this_state[k],
3854 SwathHeightY: v->SwathHeightYThisState[k],
3855 SwathHeightC: v->SwathHeightCThisState[k],
3856 TWait: v->TWait,
3857 DSTXAfterScaler: &v->DSTXAfterScaler[k],
3858 DSTYAfterScaler: &v->DSTYAfterScaler[k],
3859 DestinationLinesForPrefetch: &v->LineTimesForPrefetch[k],
3860 PrefetchBandwidth: &v->PrefetchBW[k],
3861 DestinationLinesToRequestVMInVBlank: &v->LinesForMetaPTE[k],
3862 DestinationLinesToRequestRowInVBlank: &v->LinesForMetaAndDPTERow[k],
3863 VRatioPrefetchY: &v->VRatioPreY[i][j][k],
3864 VRatioPrefetchC: &v->VRatioPreC[i][j][k],
3865 RequiredPrefetchPixDataBWLuma: &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3866 RequiredPrefetchPixDataBWChroma: &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3867 NotEnoughTimeForDynamicMetadata: &v->NoTimeForDynamicMetadata[i][j][k],
3868 Tno_bw: &v->Tno_bw[k],
3869 prefetch_vmrow_bw: &v->prefetch_vmrow_bw[k],
3870 Tdmdl_vm: &v->dummy7[k],
3871 Tdmdl: &v->dummy8[k],
3872 TSetup: &v->dummy13[k],
3873 VUpdateOffsetPix: &v->VUpdateOffsetPix[k],
3874 VUpdateWidthPix: &v->VUpdateWidthPix[k],
3875 VReadyOffsetPix: &v->VReadyOffsetPix[k]);
3876}
3877
3878void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3879{
3880 struct vba_vars_st *v = &mode_lib->vba;
3881
3882 int i, j;
3883 unsigned int k, m;
3884 int ReorderingBytes;
3885 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3886 bool NoChroma = true;
3887 bool EnoughWritebackUnits = true;
3888 bool P2IWith420 = false;
3889 bool DSCOnlyIfNecessaryWithBPP = false;
3890 bool DSC422NativeNotSupported = false;
3891 double MaxTotalVActiveRDBandwidth;
3892 bool ViewportExceedsSurface = false;
3893 bool FMTBufferExceeded = false;
3894
3895 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3896
3897 CalculateMinAndMaxPrefetchMode(
3898 AllowDRAMSelfRefreshOrDRAMClockChangeInVblank: mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3899 MinPrefetchMode: &MinPrefetchMode, MaxPrefetchMode: &MaxPrefetchMode);
3900
3901 /*Scale Ratio, taps Support Check*/
3902
3903 v->ScaleRatioAndTapsSupport = true;
3904 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3905 if (v->ScalerEnabled[k] == false
3906 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3907 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3908 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3909 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3910 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3911 v->ScaleRatioAndTapsSupport = false;
3912 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3913 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3914 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3915 || v->VRatio[k] > v->vtaps[k]
3916 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3917 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3918 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3919 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3920 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3921 || v->HRatioChroma[k] > v->MaxHSCLRatio
3922 || v->VRatioChroma[k] > v->MaxVSCLRatio
3923 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3924 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3925 v->ScaleRatioAndTapsSupport = false;
3926 }
3927 }
3928 /*Source Format, Pixel Format and Scan Support Check*/
3929
3930 v->SourceFormatPixelAndScanSupport = true;
3931 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3932 if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) {
3933 v->SourceFormatPixelAndScanSupport = false;
3934 }
3935 }
3936 /*Bandwidth Support Check*/
3937
3938 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3939 CalculateBytePerPixelAnd256BBlockSizes(
3940 SourcePixelFormat: v->SourcePixelFormat[k],
3941 SurfaceTiling: v->SurfaceTiling[k],
3942 BytePerPixelY: &v->BytePerPixelY[k],
3943 BytePerPixelC: &v->BytePerPixelC[k],
3944 BytePerPixelDETY: &v->BytePerPixelInDETY[k],
3945 BytePerPixelDETC: &v->BytePerPixelInDETC[k],
3946 BlockHeight256BytesY: &v->Read256BlockHeightY[k],
3947 BlockHeight256BytesC: &v->Read256BlockHeightC[k],
3948 BlockWidth256BytesY: &v->Read256BlockWidthY[k],
3949 BlockWidth256BytesC: &v->Read256BlockWidthC[k]);
3950 }
3951 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3952 if (v->SourceScan[k] != dm_vert) {
3953 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3954 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3955 } else {
3956 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3957 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3958 }
3959 }
3960 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3961 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(a: v->BytePerPixelInDETY[k], granularity: 1.0)
3962 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3963 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(a: v->BytePerPixelInDETC[k], granularity: 2.0)
3964 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3965 }
3966 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3967 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3968 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3969 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3970 } else if (v->WritebackEnable[k] == true) {
3971 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3972 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3973 } else {
3974 v->WriteBandwidth[k] = 0.0;
3975 }
3976 }
3977
3978 /*Writeback Latency support check*/
3979
3980 v->WritebackLatencySupport = true;
3981 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3982 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3983 v->WritebackLatencySupport = false;
3984 }
3985 }
3986
3987 /*Writeback Mode Support Check*/
3988
3989 v->TotalNumberOfActiveWriteback = 0;
3990 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3991 if (v->WritebackEnable[k] == true) {
3992 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
3993 }
3994 }
3995
3996 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3997 EnoughWritebackUnits = false;
3998 }
3999
4000 /*Writeback Scale Ratio and Taps Support Check*/
4001
4002 v->WritebackScaleRatioAndTapsSupport = true;
4003 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4004 if (v->WritebackEnable[k] == true) {
4005 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
4006 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
4007 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
4008 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
4009 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
4010 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
4011 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
4012 v->WritebackScaleRatioAndTapsSupport = false;
4013 }
4014 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
4015 v->WritebackScaleRatioAndTapsSupport = false;
4016 }
4017 }
4018 }
4019 /*Maximum DISPCLK/DPPCLK Support check*/
4020
4021 v->WritebackRequiredDISPCLK = 0.0;
4022 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4023 if (v->WritebackEnable[k] == true) {
4024 v->WritebackRequiredDISPCLK = dml_max(
4025 a: v->WritebackRequiredDISPCLK,
4026 b: dml314_CalculateWriteBackDISPCLK(
4027 WritebackPixelFormat: v->WritebackPixelFormat[k],
4028 PixelClock: v->PixelClock[k],
4029 WritebackHRatio: v->WritebackHRatio[k],
4030 WritebackVRatio: v->WritebackVRatio[k],
4031 WritebackHTaps: v->WritebackHTaps[k],
4032 WritebackVTaps: v->WritebackVTaps[k],
4033 WritebackSourceWidth: v->WritebackSourceWidth[k],
4034 WritebackDestinationWidth: v->WritebackDestinationWidth[k],
4035 HTotal: v->HTotal[k],
4036 WritebackLineBufferSize: v->WritebackLineBufferSize));
4037 }
4038 }
4039 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4040 if (v->HRatio[k] > 1.0) {
4041 v->PSCL_FACTOR[k] = dml_min(
4042 a: v->MaxDCHUBToPSCLThroughput,
4043 b: v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(a: v->htaps[k] / 6.0, granularity: 1.0));
4044 } else {
4045 v->PSCL_FACTOR[k] = dml_min(a: v->MaxDCHUBToPSCLThroughput, b: v->MaxPSCLToLBThroughput);
4046 }
4047 if (v->BytePerPixelC[k] == 0.0) {
4048 v->PSCL_FACTOR_CHROMA[k] = 0.0;
4049 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4050 * dml_max3(
4051 a: v->vtaps[k] / 6.0 * dml_min(a: 1.0, b: v->HRatio[k]),
4052 b: v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4053 c: 1.0);
4054 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4055 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4056 }
4057 } else {
4058 if (v->HRatioChroma[k] > 1.0) {
4059 v->PSCL_FACTOR_CHROMA[k] = dml_min(
4060 a: v->MaxDCHUBToPSCLThroughput,
4061 b: v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(a: v->HTAPsChroma[k] / 6.0, granularity: 1.0));
4062 } else {
4063 v->PSCL_FACTOR_CHROMA[k] = dml_min(a: v->MaxDCHUBToPSCLThroughput, b: v->MaxPSCLToLBThroughput);
4064 }
4065 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
4066 * dml_max5(
4067 a: v->vtaps[k] / 6.0 * dml_min(a: 1.0, b: v->HRatio[k]),
4068 b: v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
4069 c: v->VTAPsChroma[k] / 6.0 * dml_min(a: 1.0, b: v->HRatioChroma[k]),
4070 d: v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
4071 e: 1.0);
4072 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
4073 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
4074 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
4075 }
4076 }
4077 }
4078 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4079 int MaximumSwathWidthSupportLuma;
4080 int MaximumSwathWidthSupportChroma;
4081
4082 if (v->SurfaceTiling[k] == dm_sw_linear) {
4083 MaximumSwathWidthSupportLuma = 8192.0;
4084 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
4085 MaximumSwathWidthSupportLuma = 2880.0;
4086 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4087 MaximumSwathWidthSupportLuma = 3840.0;
4088 } else {
4089 MaximumSwathWidthSupportLuma = 5760.0;
4090 }
4091
4092 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4093 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4094 } else {
4095 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4096 }
4097 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(a: v->HRatio[k], b: 1.0) / v->LBBitPerPixel[k]
4098 / (v->vtaps[k] + dml_max(a: dml_ceil(a: v->VRatio[k], granularity: 1.0) - 2, b: 0.0));
4099 if (v->BytePerPixelC[k] == 0.0) {
4100 v->MaximumSwathWidthInLineBufferChroma = 0;
4101 } else {
4102 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(a: v->HRatioChroma[k], b: 1.0) / v->LBBitPerPixel[k]
4103 / (v->VTAPsChroma[k] + dml_max(a: dml_ceil(a: v->VRatioChroma[k], granularity: 1.0) - 2, b: 0.0));
4104 }
4105 v->MaximumSwathWidthLuma[k] = dml_min(a: MaximumSwathWidthSupportLuma, b: v->MaximumSwathWidthInLineBufferLuma);
4106 v->MaximumSwathWidthChroma[k] = dml_min(a: MaximumSwathWidthSupportChroma, b: v->MaximumSwathWidthInLineBufferChroma);
4107 }
4108
4109 CalculateSwathAndDETConfiguration(
4110 ForceSingleDPP: true,
4111 NumberOfActivePlanes: v->NumberOfActivePlanes,
4112 DETBufferSizeInKByte: v->DETBufferSizeInKByte[0],
4113 MaximumSwathWidthLuma: v->MaximumSwathWidthLuma,
4114 MaximumSwathWidthChroma: v->MaximumSwathWidthChroma,
4115 SourceScan: v->SourceScan,
4116 SourcePixelFormat: v->SourcePixelFormat,
4117 SurfaceTiling: v->SurfaceTiling,
4118 ViewportWidth: v->ViewportWidth,
4119 ViewportHeight: v->ViewportHeight,
4120 SurfaceWidthY: v->SurfaceWidthY,
4121 SurfaceWidthC: v->SurfaceWidthC,
4122 SurfaceHeightY: v->SurfaceHeightY,
4123 SurfaceHeightC: v->SurfaceHeightC,
4124 Read256BytesBlockHeightY: v->Read256BlockHeightY,
4125 Read256BytesBlockHeightC: v->Read256BlockHeightC,
4126 Read256BytesBlockWidthY: v->Read256BlockWidthY,
4127 Read256BytesBlockWidthC: v->Read256BlockWidthC,
4128 ODMCombineEnabled: v->odm_combine_dummy,
4129 BlendingAndTiming: v->BlendingAndTiming,
4130 BytePerPixY: v->BytePerPixelY,
4131 BytePerPixC: v->BytePerPixelC,
4132 BytePerPixDETY: v->BytePerPixelInDETY,
4133 BytePerPixDETC: v->BytePerPixelInDETC,
4134 HActive: v->HActive,
4135 HRatio: v->HRatio,
4136 HRatioChroma: v->HRatioChroma,
4137 DPPPerPlane: v->NoOfDPPThisState,
4138 swath_width_luma_ub: v->swath_width_luma_ub_this_state,
4139 swath_width_chroma_ub: v->swath_width_chroma_ub_this_state,
4140 SwathWidth: v->SwathWidthYThisState,
4141 SwathWidthChroma: v->SwathWidthCThisState,
4142 SwathHeightY: v->SwathHeightYThisState,
4143 SwathHeightC: v->SwathHeightCThisState,
4144 DETBufferSizeY: v->DETBufferSizeYThisState,
4145 DETBufferSizeC: v->DETBufferSizeCThisState,
4146 ViewportSizeSupportPerPlane: v->SingleDPPViewportSizeSupportPerPlane,
4147 ViewportSizeSupport: &v->ViewportSizeSupport[0][0]);
4148
4149 for (i = 0; i < v->soc.num_states; i++) {
4150 for (j = 0; j < 2; j++) {
4151 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(Clock: v->MaxDispclk[i], VCOSpeed: v->DISPCLKDPPCLKVCOSpeed);
4152 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(Clock: v->MaxDppclk[i], VCOSpeed: v->DISPCLKDPPCLKVCOSpeed);
4153 v->RequiredDISPCLK[i][j] = 0.0;
4154 v->DISPCLK_DPPCLK_Support[i][j] = true;
4155 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4156 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4157 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4158 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4159 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4160 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4161 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4162 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4163 }
4164 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4165 * (1 + v->DISPCLKRampingMargin / 100.0);
4166 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4167 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4168 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4169 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4170 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4171 }
4172 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4173 * (1 + v->DISPCLKRampingMargin / 100.0);
4174 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4175 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4176 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4177 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4178 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4179 }
4180
4181 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4182 || !(v->Output[k] == dm_dp ||
4183 v->Output[k] == dm_dp2p0 ||
4184 v->Output[k] == dm_edp)) {
4185 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4186 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4187
4188 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4189 FMTBufferExceeded = true;
4190 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4191 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4192 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4193 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4194 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4195 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4196 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4197 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4198 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4199 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4200 } else {
4201 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4202 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4203 }
4204 if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH
4205 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4206 if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) {
4207 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4208 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4209 } else {
4210 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4211 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4212 }
4213 }
4214 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH
4215 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4216 if (v->Output[k] == dm_hdmi) {
4217 FMTBufferExceeded = true;
4218 } else if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) {
4219 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4220 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4221
4222 if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH)
4223 FMTBufferExceeded = true;
4224 } else {
4225 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4226 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4227 }
4228 }
4229 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4230 v->MPCCombine[i][j][k] = false;
4231 v->NoOfDPP[i][j][k] = 4;
4232 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4233 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4234 v->MPCCombine[i][j][k] = false;
4235 v->NoOfDPP[i][j][k] = 2;
4236 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4237 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4238 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4239 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4240 v->MPCCombine[i][j][k] = false;
4241 v->NoOfDPP[i][j][k] = 1;
4242 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4243 } else {
4244 v->MPCCombine[i][j][k] = true;
4245 v->NoOfDPP[i][j][k] = 2;
4246 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4247 }
4248 v->RequiredDISPCLK[i][j] = dml_max(a: v->RequiredDISPCLK[i][j], b: v->PlaneRequiredDISPCLK);
4249 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4250 > v->MaxDppclkRoundedDownToDFSGranularity)
4251 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4252 v->DISPCLK_DPPCLK_Support[i][j] = false;
4253 }
4254 }
4255 v->TotalNumberOfActiveDPP[i][j] = 0;
4256 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4257 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4258 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4259 if (v->NoOfDPP[i][j][k] == 1)
4260 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4261 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4262 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4263 NoChroma = false;
4264 }
4265
4266 // UPTO
4267 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4268 && !UnboundedRequest(UseUnboundedRequestingFinal: v->UseUnboundedRequesting, TotalNumberOfActiveDPP: v->TotalNumberOfActiveDPP[i][j], NoChroma, Output: v->Output[0])) {
4269 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4270 double BWOfNonSplitPlaneOfMaximumBandwidth;
4271 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4272
4273 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4274 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4275 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4276 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4277 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4278 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4279 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4280 }
4281 }
4282 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4283 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4284 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4285 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4286 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4287 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4288 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4289 }
4290 }
4291 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4292 v->RequiredDISPCLK[i][j] = 0.0;
4293 v->DISPCLK_DPPCLK_Support[i][j] = true;
4294 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4295 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4296 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4297 v->MPCCombine[i][j][k] = true;
4298 v->NoOfDPP[i][j][k] = 2;
4299 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4300 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4301 } else {
4302 v->MPCCombine[i][j][k] = false;
4303 v->NoOfDPP[i][j][k] = 1;
4304 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4305 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4306 }
4307 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4308 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4309 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4310 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4311 } else {
4312 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4313 }
4314 v->RequiredDISPCLK[i][j] = dml_max(a: v->RequiredDISPCLK[i][j], b: v->PlaneRequiredDISPCLK);
4315 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4316 > v->MaxDppclkRoundedDownToDFSGranularity)
4317 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4318 v->DISPCLK_DPPCLK_Support[i][j] = false;
4319 }
4320 }
4321 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4322 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4323 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4324 }
4325 }
4326 v->RequiredDISPCLK[i][j] = dml_max(a: v->RequiredDISPCLK[i][j], b: v->WritebackRequiredDISPCLK);
4327 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4328 v->DISPCLK_DPPCLK_Support[i][j] = false;
4329 }
4330 }
4331 }
4332
4333 /*Total Available Pipes Support Check*/
4334
4335 for (i = 0; i < v->soc.num_states; i++) {
4336 for (j = 0; j < 2; j++) {
4337 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4338 v->TotalAvailablePipesSupport[i][j] = true;
4339 } else {
4340 v->TotalAvailablePipesSupport[i][j] = false;
4341 }
4342 }
4343 }
4344 /*Display IO and DSC Support Check*/
4345
4346 v->NonsupportedDSCInputBPC = false;
4347 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4348 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4349 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4350 v->NonsupportedDSCInputBPC = true;
4351 }
4352 }
4353
4354 /*Number Of DSC Slices*/
4355 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4356 if (v->BlendingAndTiming[k] == k) {
4357 if (v->PixelClockBackEnd[k] > 3200) {
4358 v->NumberOfDSCSlices[k] = dml_ceil(a: v->PixelClockBackEnd[k] / 400.0, granularity: 4.0);
4359 } else if (v->PixelClockBackEnd[k] > 1360) {
4360 v->NumberOfDSCSlices[k] = 8;
4361 } else if (v->PixelClockBackEnd[k] > 680) {
4362 v->NumberOfDSCSlices[k] = 4;
4363 } else if (v->PixelClockBackEnd[k] > 340) {
4364 v->NumberOfDSCSlices[k] = 2;
4365 } else {
4366 v->NumberOfDSCSlices[k] = 1;
4367 }
4368 } else {
4369 v->NumberOfDSCSlices[k] = 0;
4370 }
4371 }
4372
4373 for (i = 0; i < v->soc.num_states; i++) {
4374 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4375 v->RequiresDSC[i][k] = false;
4376 v->RequiresFEC[i][k] = false;
4377 if (v->BlendingAndTiming[k] == k) {
4378 if (v->Output[k] == dm_hdmi) {
4379 v->RequiresDSC[i][k] = false;
4380 v->RequiresFEC[i][k] = false;
4381 v->OutputBppPerState[i][k] = TruncToValidBPP(
4382 LinkBitRate: dml_min(a: 600.0, b: v->PHYCLKPerState[i]) * 10,
4383 Lanes: 3,
4384 HTotal: v->HTotal[k],
4385 HActive: v->HActive[k],
4386 PixelClock: v->PixelClockBackEnd[k],
4387 DesiredBPP: v->ForcedOutputLinkBPP[k],
4388 DSCEnable: false,
4389 Output: v->Output[k],
4390 Format: v->OutputFormat[k],
4391 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4392 DSCSlices: v->NumberOfDSCSlices[k],
4393 AudioRate: v->AudioSampleRate[k],
4394 AudioLayout: v->AudioSampleLayout[k],
4395 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4396 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
4397 if (v->DSCEnable[k] == true) {
4398 v->RequiresDSC[i][k] = true;
4399 v->LinkDSCEnable = true;
4400 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
4401 v->RequiresFEC[i][k] = true;
4402 } else {
4403 v->RequiresFEC[i][k] = false;
4404 }
4405 } else {
4406 v->RequiresDSC[i][k] = false;
4407 v->LinkDSCEnable = false;
4408 if (v->Output[k] == dm_dp2p0) {
4409 v->RequiresFEC[i][k] = true;
4410 } else {
4411 v->RequiresFEC[i][k] = false;
4412 }
4413 }
4414 if (v->Output[k] == dm_dp2p0) {
4415 v->Outbpp = BPP_INVALID;
4416 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
4417 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
4418 v->Outbpp = TruncToValidBPP(
4419 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 10000,
4420 Lanes: v->OutputLinkDPLanes[k],
4421 HTotal: v->HTotal[k],
4422 HActive: v->HActive[k],
4423 PixelClock: v->PixelClockBackEnd[k],
4424 DesiredBPP: v->ForcedOutputLinkBPP[k],
4425 DSCEnable: v->LinkDSCEnable,
4426 Output: v->Output[k],
4427 Format: v->OutputFormat[k],
4428 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4429 DSCSlices: v->NumberOfDSCSlices[k],
4430 AudioRate: v->AudioSampleRate[k],
4431 AudioLayout: v->AudioSampleLayout[k],
4432 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4433 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
4434 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4435 v->RequiresDSC[i][k] = true;
4436 v->LinkDSCEnable = true;
4437 v->Outbpp = TruncToValidBPP(
4438 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 10000,
4439 Lanes: v->OutputLinkDPLanes[k],
4440 HTotal: v->HTotal[k],
4441 HActive: v->HActive[k],
4442 PixelClock: v->PixelClockBackEnd[k],
4443 DesiredBPP: v->ForcedOutputLinkBPP[k],
4444 DSCEnable: v->LinkDSCEnable,
4445 Output: v->Output[k],
4446 Format: v->OutputFormat[k],
4447 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4448 DSCSlices: v->NumberOfDSCSlices[k],
4449 AudioRate: v->AudioSampleRate[k],
4450 AudioLayout: v->AudioSampleLayout[k],
4451 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4452 }
4453 v->OutputBppPerState[i][k] = v->Outbpp;
4454 // TODO: Need some other way to handle this nonsense
4455 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
4456 }
4457 if (v->Outbpp == BPP_INVALID &&
4458 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
4459 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
4460 v->Outbpp = TruncToValidBPP(
4461 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 13500,
4462 Lanes: v->OutputLinkDPLanes[k],
4463 HTotal: v->HTotal[k],
4464 HActive: v->HActive[k],
4465 PixelClock: v->PixelClockBackEnd[k],
4466 DesiredBPP: v->ForcedOutputLinkBPP[k],
4467 DSCEnable: v->LinkDSCEnable,
4468 Output: v->Output[k],
4469 Format: v->OutputFormat[k],
4470 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4471 DSCSlices: v->NumberOfDSCSlices[k],
4472 AudioRate: v->AudioSampleRate[k],
4473 AudioLayout: v->AudioSampleLayout[k],
4474 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4475 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
4476 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4477 v->RequiresDSC[i][k] = true;
4478 v->LinkDSCEnable = true;
4479 v->Outbpp = TruncToValidBPP(
4480 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 13500,
4481 Lanes: v->OutputLinkDPLanes[k],
4482 HTotal: v->HTotal[k],
4483 HActive: v->HActive[k],
4484 PixelClock: v->PixelClockBackEnd[k],
4485 DesiredBPP: v->ForcedOutputLinkBPP[k],
4486 DSCEnable: v->LinkDSCEnable,
4487 Output: v->Output[k],
4488 Format: v->OutputFormat[k],
4489 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4490 DSCSlices: v->NumberOfDSCSlices[k],
4491 AudioRate: v->AudioSampleRate[k],
4492 AudioLayout: v->AudioSampleLayout[k],
4493 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4494 }
4495 v->OutputBppPerState[i][k] = v->Outbpp;
4496 // TODO: Need some other way to handle this nonsense
4497 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
4498 }
4499 if (v->Outbpp == BPP_INVALID &&
4500 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
4501 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
4502 v->Outbpp = TruncToValidBPP(
4503 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 20000,
4504 Lanes: v->OutputLinkDPLanes[k],
4505 HTotal: v->HTotal[k],
4506 HActive: v->HActive[k],
4507 PixelClock: v->PixelClockBackEnd[k],
4508 DesiredBPP: v->ForcedOutputLinkBPP[k],
4509 DSCEnable: v->LinkDSCEnable,
4510 Output: v->Output[k],
4511 Format: v->OutputFormat[k],
4512 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4513 DSCSlices: v->NumberOfDSCSlices[k],
4514 AudioRate: v->AudioSampleRate[k],
4515 AudioLayout: v->AudioSampleLayout[k],
4516 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4517 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
4518 v->ForcedOutputLinkBPP[k] == 0) {
4519 v->RequiresDSC[i][k] = true;
4520 v->LinkDSCEnable = true;
4521 v->Outbpp = TruncToValidBPP(
4522 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 20000,
4523 Lanes: v->OutputLinkDPLanes[k],
4524 HTotal: v->HTotal[k],
4525 HActive: v->HActive[k],
4526 PixelClock: v->PixelClockBackEnd[k],
4527 DesiredBPP: v->ForcedOutputLinkBPP[k],
4528 DSCEnable: v->LinkDSCEnable,
4529 Output: v->Output[k],
4530 Format: v->OutputFormat[k],
4531 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4532 DSCSlices: v->NumberOfDSCSlices[k],
4533 AudioRate: v->AudioSampleRate[k],
4534 AudioLayout: v->AudioSampleLayout[k],
4535 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4536 }
4537 v->OutputBppPerState[i][k] = v->Outbpp;
4538 // TODO: Need some other way to handle this nonsense
4539 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
4540 }
4541 } else {
4542 v->Outbpp = BPP_INVALID;
4543 if (v->PHYCLKPerState[i] >= 270.0) {
4544 v->Outbpp = TruncToValidBPP(
4545 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 2700,
4546 Lanes: v->OutputLinkDPLanes[k],
4547 HTotal: v->HTotal[k],
4548 HActive: v->HActive[k],
4549 PixelClock: v->PixelClockBackEnd[k],
4550 DesiredBPP: v->ForcedOutputLinkBPP[k],
4551 DSCEnable: v->LinkDSCEnable,
4552 Output: v->Output[k],
4553 Format: v->OutputFormat[k],
4554 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4555 DSCSlices: v->NumberOfDSCSlices[k],
4556 AudioRate: v->AudioSampleRate[k],
4557 AudioLayout: v->AudioSampleLayout[k],
4558 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4559 v->OutputBppPerState[i][k] = v->Outbpp;
4560 // TODO: Need some other way to handle this nonsense
4561 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4562 }
4563 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4564 v->Outbpp = TruncToValidBPP(
4565 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 5400,
4566 Lanes: v->OutputLinkDPLanes[k],
4567 HTotal: v->HTotal[k],
4568 HActive: v->HActive[k],
4569 PixelClock: v->PixelClockBackEnd[k],
4570 DesiredBPP: v->ForcedOutputLinkBPP[k],
4571 DSCEnable: v->LinkDSCEnable,
4572 Output: v->Output[k],
4573 Format: v->OutputFormat[k],
4574 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4575 DSCSlices: v->NumberOfDSCSlices[k],
4576 AudioRate: v->AudioSampleRate[k],
4577 AudioLayout: v->AudioSampleLayout[k],
4578 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4579 v->OutputBppPerState[i][k] = v->Outbpp;
4580 // TODO: Need some other way to handle this nonsense
4581 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4582 }
4583 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4584 v->Outbpp = TruncToValidBPP(
4585 LinkBitRate: (1.0 - v->Downspreading / 100.0) * 8100,
4586 Lanes: v->OutputLinkDPLanes[k],
4587 HTotal: v->HTotal[k],
4588 HActive: v->HActive[k],
4589 PixelClock: v->PixelClockBackEnd[k],
4590 DesiredBPP: v->ForcedOutputLinkBPP[k],
4591 DSCEnable: v->LinkDSCEnable,
4592 Output: v->Output[k],
4593 Format: v->OutputFormat[k],
4594 DSCInputBitPerComponent: v->DSCInputBitPerComponent[k],
4595 DSCSlices: v->NumberOfDSCSlices[k],
4596 AudioRate: v->AudioSampleRate[k],
4597 AudioLayout: v->AudioSampleLayout[k],
4598 ODMCombine: v->ODMCombineEnablePerState[i][k]);
4599 v->OutputBppPerState[i][k] = v->Outbpp;
4600 // TODO: Need some other way to handle this nonsense
4601 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4602 }
4603 }
4604 }
4605 } else {
4606 v->OutputBppPerState[i][k] = 0;
4607 }
4608 }
4609 }
4610
4611 for (i = 0; i < v->soc.num_states; i++) {
4612 v->LinkCapacitySupport[i] = true;
4613 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4614 if (v->BlendingAndTiming[k] == k
4615 && (v->Output[k] == dm_dp ||
4616 v->Output[k] == dm_edp ||
4617 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4618 v->LinkCapacitySupport[i] = false;
4619 }
4620 }
4621 }
4622
4623 // UPTO 2172
4624 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4625 if (v->BlendingAndTiming[k] == k
4626 && (v->Output[k] == dm_dp ||
4627 v->Output[k] == dm_edp ||
4628 v->Output[k] == dm_hdmi)) {
4629 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4630 P2IWith420 = true;
4631 }
4632 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4633 && !v->DSC422NativeSupport) {
4634 DSC422NativeNotSupported = true;
4635 }
4636 }
4637 }
4638
4639
4640 for (i = 0; i < v->soc.num_states; ++i) {
4641 v->ODMCombine4To1SupportCheckOK[i] = true;
4642 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4643 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4644 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4645 || v->Output[k] == dm_hdmi)) {
4646 v->ODMCombine4To1SupportCheckOK[i] = false;
4647 }
4648 }
4649 }
4650
4651 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4652
4653 for (i = 0; i < v->soc.num_states; i++) {
4654 v->NotEnoughDSCUnits[i] = false;
4655 v->TotalDSCUnitsRequired = 0.0;
4656 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4657 if (v->RequiresDSC[i][k] == true) {
4658 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4659 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4660 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4661 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4662 } else {
4663 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4664 }
4665 }
4666 }
4667 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4668 v->NotEnoughDSCUnits[i] = true;
4669 }
4670 }
4671 /*DSC Delay per state*/
4672
4673 for (i = 0; i < v->soc.num_states; i++) {
4674 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4675 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4676 v->BPP = 0.0;
4677 } else {
4678 v->BPP = v->OutputBppPerState[i][k];
4679 }
4680 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4681 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4682 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4683 bpc: v->DSCInputBitPerComponent[k],
4684 BPP: v->BPP,
4685 sliceWidth: dml_ceil(a: 1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], granularity: 1.0),
4686 numSlices: v->NumberOfDSCSlices[k],
4687 pixelFormat: v->OutputFormat[k],
4688 Output: v->Output[k]) + dscComputeDelay(pixelFormat: v->OutputFormat[k], Output: v->Output[k]);
4689 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4690 v->DSCDelayPerState[i][k] = 2.0
4691 * (dscceComputeDelay(
4692 bpc: v->DSCInputBitPerComponent[k],
4693 BPP: v->BPP,
4694 sliceWidth: dml_ceil(a: 1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], granularity: 1.0),
4695 numSlices: v->NumberOfDSCSlices[k] / 2,
4696 pixelFormat: v->OutputFormat[k],
4697 Output: v->Output[k]) + dscComputeDelay(pixelFormat: v->OutputFormat[k], Output: v->Output[k]));
4698 } else {
4699 v->DSCDelayPerState[i][k] = 4.0
4700 * (dscceComputeDelay(
4701 bpc: v->DSCInputBitPerComponent[k],
4702 BPP: v->BPP,
4703 sliceWidth: dml_ceil(a: 1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], granularity: 1.0),
4704 numSlices: v->NumberOfDSCSlices[k] / 4,
4705 pixelFormat: v->OutputFormat[k],
4706 Output: v->Output[k]) + dscComputeDelay(pixelFormat: v->OutputFormat[k], Output: v->Output[k]));
4707 }
4708 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] + (v->HTotal[k] - v->HActive[k]) * dml_ceil(a: (double) v->DSCDelayPerState[i][k] / v->HActive[k], granularity: 1.0);
4709 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4710 } else {
4711 v->DSCDelayPerState[i][k] = 0.0;
4712 }
4713 }
4714 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4715 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4716 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4717 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4718 }
4719 }
4720 }
4721 }
4722
4723 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4724 //
4725 for (i = 0; i < v->soc.num_states; ++i) {
4726 for (j = 0; j <= 1; ++j) {
4727 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4728 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4729 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4730 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4731 }
4732
4733 CalculateSwathAndDETConfiguration(
4734 ForceSingleDPP: false,
4735 NumberOfActivePlanes: v->NumberOfActivePlanes,
4736 DETBufferSizeInKByte: v->DETBufferSizeInKByte[0],
4737 MaximumSwathWidthLuma: v->MaximumSwathWidthLuma,
4738 MaximumSwathWidthChroma: v->MaximumSwathWidthChroma,
4739 SourceScan: v->SourceScan,
4740 SourcePixelFormat: v->SourcePixelFormat,
4741 SurfaceTiling: v->SurfaceTiling,
4742 ViewportWidth: v->ViewportWidth,
4743 ViewportHeight: v->ViewportHeight,
4744 SurfaceWidthY: v->SurfaceWidthY,
4745 SurfaceWidthC: v->SurfaceWidthC,
4746 SurfaceHeightY: v->SurfaceHeightY,
4747 SurfaceHeightC: v->SurfaceHeightC,
4748 Read256BytesBlockHeightY: v->Read256BlockHeightY,
4749 Read256BytesBlockHeightC: v->Read256BlockHeightC,
4750 Read256BytesBlockWidthY: v->Read256BlockWidthY,
4751 Read256BytesBlockWidthC: v->Read256BlockWidthC,
4752 ODMCombineEnabled: v->ODMCombineEnableThisState,
4753 BlendingAndTiming: v->BlendingAndTiming,
4754 BytePerPixY: v->BytePerPixelY,
4755 BytePerPixC: v->BytePerPixelC,
4756 BytePerPixDETY: v->BytePerPixelInDETY,
4757 BytePerPixDETC: v->BytePerPixelInDETC,
4758 HActive: v->HActive,
4759 HRatio: v->HRatio,
4760 HRatioChroma: v->HRatioChroma,
4761 DPPPerPlane: v->NoOfDPPThisState,
4762 swath_width_luma_ub: v->swath_width_luma_ub_this_state,
4763 swath_width_chroma_ub: v->swath_width_chroma_ub_this_state,
4764 SwathWidth: v->SwathWidthYThisState,
4765 SwathWidthChroma: v->SwathWidthCThisState,
4766 SwathHeightY: v->SwathHeightYThisState,
4767 SwathHeightC: v->SwathHeightCThisState,
4768 DETBufferSizeY: v->DETBufferSizeYThisState,
4769 DETBufferSizeC: v->DETBufferSizeCThisState,
4770 ViewportSizeSupportPerPlane: v->dummystring,
4771 ViewportSizeSupport: &v->ViewportSizeSupport[i][j]);
4772
4773 CalculateDCFCLKDeepSleep(
4774 mode_lib,
4775 NumberOfActivePlanes: v->NumberOfActivePlanes,
4776 BytePerPixelY: v->BytePerPixelY,
4777 BytePerPixelC: v->BytePerPixelC,
4778 VRatio: v->VRatio,
4779 VRatioChroma: v->VRatioChroma,
4780 SwathWidthY: v->SwathWidthYThisState,
4781 SwathWidthC: v->SwathWidthCThisState,
4782 DPPPerPlane: v->NoOfDPPThisState,
4783 HRatio: v->HRatio,
4784 HRatioChroma: v->HRatioChroma,
4785 PixelClock: v->PixelClock,
4786 PSCL_THROUGHPUT: v->PSCL_FACTOR,
4787 PSCL_THROUGHPUT_CHROMA: v->PSCL_FACTOR_CHROMA,
4788 DPPCLK: v->RequiredDPPCLKThisState,
4789 ReadBandwidthLuma: v->ReadBandwidthLuma,
4790 ReadBandwidthChroma: v->ReadBandwidthChroma,
4791 ReturnBusWidth: v->ReturnBusWidth,
4792 DCFCLKDeepSleep: &v->ProjectedDCFCLKDeepSleep[i][j]);
4793
4794 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4795 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4796 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4797 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4798 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4799 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4800 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4801 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4802 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4803 }
4804 }
4805 }
4806
4807 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4808 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4809 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4810 }
4811
4812 for (i = 0; i < v->soc.num_states; i++) {
4813 for (j = 0; j < 2; j++) {
4814 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4815
4816 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4817 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4818 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4819 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4820 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4821 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4822 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4823 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4824 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4825 }
4826
4827 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4828 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4829 if (v->DCCEnable[k] == true) {
4830 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4831 }
4832 }
4833
4834 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4835 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4836 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4837
4838 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4839 && v->SourceScan[k] != dm_vert) {
4840 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4841 / 2;
4842 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4843 } else {
4844 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4845 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4846 }
4847
4848 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4849 mode_lib,
4850 DCCEnable: v->DCCEnable[k],
4851 BlockHeight256Bytes: v->Read256BlockHeightC[k],
4852 BlockWidth256Bytes: v->Read256BlockWidthC[k],
4853 SourcePixelFormat: v->SourcePixelFormat[k],
4854 SurfaceTiling: v->SurfaceTiling[k],
4855 BytePerPixel: v->BytePerPixelC[k],
4856 ScanDirection: v->SourceScan[k],
4857 SwathWidth: v->SwathWidthCThisState[k],
4858 ViewportHeight: v->ViewportHeightChroma[k],
4859 GPUVMEnable: v->GPUVMEnable,
4860 HostVMEnable: v->HostVMEnable,
4861 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels,
4862 GPUVMMinPageSize: v->GPUVMMinPageSize,
4863 HostVMMinPageSize: v->HostVMMinPageSize,
4864 PTEBufferSizeInRequests: v->PTEBufferSizeInRequestsForChroma,
4865 Pitch: v->PitchC[k],
4866 DCCMetaPitch: 0.0,
4867 MacroTileWidth: &v->MacroTileWidthC[k],
4868 MetaRowByte: &v->MetaRowBytesC,
4869 PixelPTEBytesPerRow: &v->DPTEBytesPerRowC,
4870 PTEBufferSizeNotExceeded: &v->PTEBufferSizeNotExceededC[i][j][k],
4871 dpte_row_width_ub: &v->dummyinteger7,
4872 dpte_row_height: &v->dpte_row_height_chroma[k],
4873 MetaRequestWidth: &v->dummyinteger28,
4874 MetaRequestHeight: &v->dummyinteger26,
4875 meta_row_width: &v->dummyinteger23,
4876 meta_row_height: &v->meta_row_height_chroma[k],
4877 vm_group_bytes: &v->dummyinteger8,
4878 dpte_group_bytes: &v->dummyinteger9,
4879 PixelPTEReqWidth: &v->dummyinteger19,
4880 PixelPTEReqHeight: &v->dummyinteger20,
4881 PTERequestSize: &v->dummyinteger17,
4882 DPDE0BytesFrame: &v->dummyinteger10,
4883 MetaPTEBytesFrame: &v->dummyinteger11);
4884
4885 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4886 mode_lib,
4887 VRatio: v->VRatioChroma[k],
4888 vtaps: v->VTAPsChroma[k],
4889 Interlace: v->Interlace[k],
4890 ProgressiveToInterlaceUnitInOPP: v->ProgressiveToInterlaceUnitInOPP,
4891 SwathHeight: v->SwathHeightCThisState[k],
4892 ViewportYStart: v->ViewportYStartC[k],
4893 VInitPreFill: &v->PrefillC[k],
4894 MaxNumSwath: &v->MaxNumSwC[k]);
4895 } else {
4896 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4897 v->PTEBufferSizeInRequestsForChroma = 0;
4898 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4899 v->MetaRowBytesC = 0.0;
4900 v->DPTEBytesPerRowC = 0.0;
4901 v->PrefetchLinesC[i][j][k] = 0.0;
4902 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4903 }
4904 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4905 mode_lib,
4906 DCCEnable: v->DCCEnable[k],
4907 BlockHeight256Bytes: v->Read256BlockHeightY[k],
4908 BlockWidth256Bytes: v->Read256BlockWidthY[k],
4909 SourcePixelFormat: v->SourcePixelFormat[k],
4910 SurfaceTiling: v->SurfaceTiling[k],
4911 BytePerPixel: v->BytePerPixelY[k],
4912 ScanDirection: v->SourceScan[k],
4913 SwathWidth: v->SwathWidthYThisState[k],
4914 ViewportHeight: v->ViewportHeight[k],
4915 GPUVMEnable: v->GPUVMEnable,
4916 HostVMEnable: v->HostVMEnable,
4917 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels,
4918 GPUVMMinPageSize: v->GPUVMMinPageSize,
4919 HostVMMinPageSize: v->HostVMMinPageSize,
4920 PTEBufferSizeInRequests: v->PTEBufferSizeInRequestsForLuma,
4921 Pitch: v->PitchY[k],
4922 DCCMetaPitch: v->DCCMetaPitchY[k],
4923 MacroTileWidth: &v->MacroTileWidthY[k],
4924 MetaRowByte: &v->MetaRowBytesY,
4925 PixelPTEBytesPerRow: &v->DPTEBytesPerRowY,
4926 PTEBufferSizeNotExceeded: &v->PTEBufferSizeNotExceededY[i][j][k],
4927 dpte_row_width_ub: &v->dummyinteger7,
4928 dpte_row_height: &v->dpte_row_height[k],
4929 MetaRequestWidth: &v->dummyinteger29,
4930 MetaRequestHeight: &v->dummyinteger27,
4931 meta_row_width: &v->dummyinteger24,
4932 meta_row_height: &v->meta_row_height[k],
4933 vm_group_bytes: &v->dummyinteger25,
4934 dpte_group_bytes: &v->dpte_group_bytes[k],
4935 PixelPTEReqWidth: &v->dummyinteger21,
4936 PixelPTEReqHeight: &v->dummyinteger22,
4937 PTERequestSize: &v->dummyinteger18,
4938 DPDE0BytesFrame: &v->dummyinteger5,
4939 MetaPTEBytesFrame: &v->dummyinteger6);
4940 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4941 mode_lib,
4942 VRatio: v->VRatio[k],
4943 vtaps: v->vtaps[k],
4944 Interlace: v->Interlace[k],
4945 ProgressiveToInterlaceUnitInOPP: v->ProgressiveToInterlaceUnitInOPP,
4946 SwathHeight: v->SwathHeightYThisState[k],
4947 ViewportYStart: v->ViewportYStartY[k],
4948 VInitPreFill: &v->PrefillY[k],
4949 MaxNumSwath: &v->MaxNumSwY[k]);
4950 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4951 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4952 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4953
4954 CalculateRowBandwidth(
4955 GPUVMEnable: v->GPUVMEnable,
4956 SourcePixelFormat: v->SourcePixelFormat[k],
4957 VRatio: v->VRatio[k],
4958 VRatioChroma: v->VRatioChroma[k],
4959 DCCEnable: v->DCCEnable[k],
4960 LineTime: v->HTotal[k] / v->PixelClock[k],
4961 MetaRowByteLuma: v->MetaRowBytesY,
4962 MetaRowByteChroma: v->MetaRowBytesC,
4963 meta_row_height_luma: v->meta_row_height[k],
4964 meta_row_height_chroma: v->meta_row_height_chroma[k],
4965 PixelPTEBytesPerRowLuma: v->DPTEBytesPerRowY,
4966 PixelPTEBytesPerRowChroma: v->DPTEBytesPerRowC,
4967 dpte_row_height_luma: v->dpte_row_height[k],
4968 dpte_row_height_chroma: v->dpte_row_height_chroma[k],
4969 meta_row_bw: &v->meta_row_bandwidth[i][j][k],
4970 dpte_row_bw: &v->dpte_row_bandwidth[i][j][k]);
4971 }
4972 /*
4973 * DCCMetaBufferSizeSupport(i, j) = True
4974 * For k = 0 To NumberOfActivePlanes - 1
4975 * If MetaRowBytes(i, j, k) > 24064 Then
4976 * DCCMetaBufferSizeSupport(i, j) = False
4977 * End If
4978 * Next k
4979 */
4980 v->DCCMetaBufferSizeSupport[i][j] = true;
4981 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4982 if (v->MetaRowBytes[i][j][k] > 24064)
4983 v->DCCMetaBufferSizeSupport[i][j] = false;
4984 }
4985 v->UrgLatency[i] = CalculateUrgentLatency(
4986 UrgentLatencyPixelDataOnly: v->UrgentLatencyPixelDataOnly,
4987 UrgentLatencyPixelMixedWithVMData: v->UrgentLatencyPixelMixedWithVMData,
4988 UrgentLatencyVMDataOnly: v->UrgentLatencyVMDataOnly,
4989 DoUrgentLatencyAdjustment: v->DoUrgentLatencyAdjustment,
4990 UrgentLatencyAdjustmentFabricClockComponent: v->UrgentLatencyAdjustmentFabricClockComponent,
4991 UrgentLatencyAdjustmentFabricClockReference: v->UrgentLatencyAdjustmentFabricClockReference,
4992 FabricClockSingle: v->FabricClockPerState[i]);
4993
4994 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4995 CalculateUrgentBurstFactor(
4996 swath_width_luma_ub: v->swath_width_luma_ub_this_state[k],
4997 swath_width_chroma_ub: v->swath_width_chroma_ub_this_state[k],
4998 SwathHeightY: v->SwathHeightYThisState[k],
4999 SwathHeightC: v->SwathHeightCThisState[k],
5000 LineTime: v->HTotal[k] / v->PixelClock[k],
5001 UrgentLatency: v->UrgLatency[i],
5002 CursorBufferSize: v->CursorBufferSize,
5003 CursorWidth: v->CursorWidth[k][0],
5004 CursorBPP: v->CursorBPP[k][0],
5005 VRatio: v->VRatio[k],
5006 VRatioC: v->VRatioChroma[k],
5007 BytePerPixelInDETY: v->BytePerPixelInDETY[k],
5008 BytePerPixelInDETC: v->BytePerPixelInDETC[k],
5009 DETBufferSizeY: v->DETBufferSizeYThisState[k],
5010 DETBufferSizeC: v->DETBufferSizeCThisState[k],
5011 UrgentBurstFactorCursor: &v->UrgentBurstFactorCursor[k],
5012 UrgentBurstFactorLuma: &v->UrgentBurstFactorLuma[k],
5013 UrgentBurstFactorChroma: &v->UrgentBurstFactorChroma[k],
5014 NotEnoughUrgentLatencyHiding: &NotUrgentLatencyHiding[k]);
5015 }
5016
5017 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
5018 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5019 if (NotUrgentLatencyHiding[k]) {
5020 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
5021 }
5022 }
5023
5024 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5025 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
5026 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
5027 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
5028 }
5029
5030 v->TotalVActivePixelBandwidth[i][j] = 0;
5031 v->TotalVActiveCursorBandwidth[i][j] = 0;
5032 v->TotalMetaRowBandwidth[i][j] = 0;
5033 v->TotalDPTERowBandwidth[i][j] = 0;
5034 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5035 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
5036 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
5037 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
5038 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
5039 }
5040 }
5041 }
5042
5043 //Calculate Return BW
5044 for (i = 0; i < v->soc.num_states; ++i) {
5045 for (j = 0; j <= 1; ++j) {
5046 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5047 if (v->BlendingAndTiming[k] == k) {
5048 if (v->WritebackEnable[k] == true) {
5049 v->WritebackDelayTime[k] = v->WritebackLatency
5050 + CalculateWriteBackDelay(
5051 WritebackPixelFormat: v->WritebackPixelFormat[k],
5052 WritebackHRatio: v->WritebackHRatio[k],
5053 WritebackVRatio: v->WritebackVRatio[k],
5054 WritebackVTaps: v->WritebackVTaps[k],
5055 WritebackDestinationWidth: v->WritebackDestinationWidth[k],
5056 WritebackDestinationHeight: v->WritebackDestinationHeight[k],
5057 WritebackSourceHeight: v->WritebackSourceHeight[k],
5058 HTotal: v->HTotal[k]) / v->RequiredDISPCLK[i][j];
5059 } else {
5060 v->WritebackDelayTime[k] = 0.0;
5061 }
5062 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5063 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
5064 v->WritebackDelayTime[k] = dml_max(
5065 a: v->WritebackDelayTime[k],
5066 b: v->WritebackLatency
5067 + CalculateWriteBackDelay(
5068 WritebackPixelFormat: v->WritebackPixelFormat[m],
5069 WritebackHRatio: v->WritebackHRatio[m],
5070 WritebackVRatio: v->WritebackVRatio[m],
5071 WritebackVTaps: v->WritebackVTaps[m],
5072 WritebackDestinationWidth: v->WritebackDestinationWidth[m],
5073 WritebackDestinationHeight: v->WritebackDestinationHeight[m],
5074 WritebackSourceHeight: v->WritebackSourceHeight[m],
5075 HTotal: v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
5076 }
5077 }
5078 }
5079 }
5080 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5081 for (m = 0; m < v->NumberOfActivePlanes; m++) {
5082 if (v->BlendingAndTiming[k] == m) {
5083 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
5084 }
5085 }
5086 }
5087 v->MaxMaxVStartup[i][j] = 0;
5088 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5089 v->MaximumVStartup[i][j][k] =
5090 CalculateMaxVStartup(
5091 VTotal: v->VTotal[k],
5092 VActive: v->VActive[k],
5093 VBlankNom: v->VBlankNom[k],
5094 HTotal: v->HTotal[k],
5095 PixelClock: v->PixelClock[k],
5096 ProgressiveTointerlaceUnitinOPP: v->ProgressiveToInterlaceUnitInOPP,
5097 Interlace: v->Interlace[k],
5098 VBlankNomDefaultUS: v->ip.VBlankNomDefaultUS,
5099 WritebackDelayTime: v->WritebackDelayTime[k]);
5100 v->MaxMaxVStartup[i][j] = dml_max(a: v->MaxMaxVStartup[i][j], b: v->MaximumVStartup[i][j][k]);
5101 }
5102 }
5103 }
5104
5105 ReorderingBytes = v->NumberOfChannels
5106 * dml_max3(
5107 a: v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5108 b: v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5109 c: v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5110
5111 for (i = 0; i < v->soc.num_states; ++i) {
5112 for (j = 0; j <= 1; ++j) {
5113 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5114 }
5115 }
5116
5117 if (v->UseMinimumRequiredDCFCLK == true)
5118 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5119
5120 for (i = 0; i < v->soc.num_states; ++i) {
5121 for (j = 0; j <= 1; ++j) {
5122 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5123 a: v->ReturnBusWidth * v->DCFCLKState[i][j],
5124 b: v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5125 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5126 double PixelDataOnlyReturnBWPerState = dml_min(
5127 a: IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5128 b: IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5129 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5130 a: IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5131 b: IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5132
5133 if (v->HostVMEnable != true) {
5134 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5135 } else {
5136 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5137 }
5138 }
5139 }
5140
5141 //Re-ordering Buffer Support Check
5142 for (i = 0; i < v->soc.num_states; ++i) {
5143 for (j = 0; j <= 1; ++j) {
5144 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5145 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5146 v->ROBSupport[i][j] = true;
5147 } else {
5148 v->ROBSupport[i][j] = false;
5149 }
5150 }
5151 }
5152
5153 //Vertical Active BW support check
5154
5155 MaxTotalVActiveRDBandwidth = 0;
5156 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5157 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5158 }
5159
5160 for (i = 0; i < v->soc.num_states; ++i) {
5161 for (j = 0; j <= 1; ++j) {
5162 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5163 a: dml_min(
5164 a: v->ReturnBusWidth * v->DCFCLKState[i][j],
5165 b: v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5166 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5167 b: v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5168 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5169
5170 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5171 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5172 } else {
5173 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5174 }
5175 }
5176 }
5177
5178 v->UrgentLatency = CalculateUrgentLatency(
5179 UrgentLatencyPixelDataOnly: v->UrgentLatencyPixelDataOnly,
5180 UrgentLatencyPixelMixedWithVMData: v->UrgentLatencyPixelMixedWithVMData,
5181 UrgentLatencyVMDataOnly: v->UrgentLatencyVMDataOnly,
5182 DoUrgentLatencyAdjustment: v->DoUrgentLatencyAdjustment,
5183 UrgentLatencyAdjustmentFabricClockComponent: v->UrgentLatencyAdjustmentFabricClockComponent,
5184 UrgentLatencyAdjustmentFabricClockReference: v->UrgentLatencyAdjustmentFabricClockReference,
5185 FabricClockSingle: v->FabricClock);
5186 //Prefetch Check
5187 for (i = 0; i < v->soc.num_states; ++i) {
5188 for (j = 0; j <= 1; ++j) {
5189 double VMDataOnlyReturnBWPerState;
5190 double HostVMInefficiencyFactor = 1;
5191 int NextPrefetchModeState = MinPrefetchMode;
5192 bool UnboundedRequestEnabledThisState = false;
5193 int CompressedBufferSizeInkByteThisState = 0;
5194 double dummy;
5195
5196 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5197
5198 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5199 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5200 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5201 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5202 }
5203
5204 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5205 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5206 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5207 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5208 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5209 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5210 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5211 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5212 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5213 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5214 }
5215
5216 VMDataOnlyReturnBWPerState = dml_min(
5217 a: dml_min(
5218 a: v->ReturnBusWidth * v->DCFCLKState[i][j],
5219 b: v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5220 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5221 b: v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5222 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5223 if (v->GPUVMEnable && v->HostVMEnable)
5224 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5225
5226 v->ExtraLatency = CalculateExtraLatency(
5227 RoundTripPingLatencyCycles: v->RoundTripPingLatencyCycles,
5228 ReorderingBytes,
5229 DCFCLK: v->DCFCLKState[i][j],
5230 TotalNumberOfActiveDPP: v->TotalNumberOfActiveDPP[i][j],
5231 PixelChunkSizeInKByte: v->PixelChunkSizeInKByte,
5232 TotalNumberOfDCCActiveDPP: v->TotalNumberOfDCCActiveDPP[i][j],
5233 MetaChunkSize: v->MetaChunkSize,
5234 ReturnBW: v->ReturnBWPerState[i][j],
5235 GPUVMEnable: v->GPUVMEnable,
5236 HostVMEnable: v->HostVMEnable,
5237 NumberOfActivePlanes: v->NumberOfActivePlanes,
5238 NumberOfDPP: v->NoOfDPPThisState,
5239 dpte_group_bytes: v->dpte_group_bytes,
5240 HostVMInefficiencyFactor,
5241 HostVMMinPageSize: v->HostVMMinPageSize,
5242 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels);
5243
5244 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5245 do {
5246 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5247 v->MaxVStartup = v->NextMaxVStartup;
5248
5249 v->TWait = CalculateTWait(
5250 PrefetchMode: v->PrefetchModePerState[i][j],
5251 DRAMClockChangeLatency: v->DRAMClockChangeLatency,
5252 UrgentLatency: v->UrgLatency[i],
5253 SREnterPlusExitTime: v->SREnterPlusExitTime);
5254
5255 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5256 CalculatePrefetchSchedulePerPlane(mode_lib,
5257 HostVMInefficiencyFactor,
5258 i, j, k);
5259 }
5260
5261 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5262 CalculateUrgentBurstFactor(
5263 swath_width_luma_ub: v->swath_width_luma_ub_this_state[k],
5264 swath_width_chroma_ub: v->swath_width_chroma_ub_this_state[k],
5265 SwathHeightY: v->SwathHeightYThisState[k],
5266 SwathHeightC: v->SwathHeightCThisState[k],
5267 LineTime: v->HTotal[k] / v->PixelClock[k],
5268 UrgentLatency: v->UrgLatency[i],
5269 CursorBufferSize: v->CursorBufferSize,
5270 CursorWidth: v->CursorWidth[k][0],
5271 CursorBPP: v->CursorBPP[k][0],
5272 VRatio: v->VRatioPreY[i][j][k],
5273 VRatioC: v->VRatioPreC[i][j][k],
5274 BytePerPixelInDETY: v->BytePerPixelInDETY[k],
5275 BytePerPixelInDETC: v->BytePerPixelInDETC[k],
5276 DETBufferSizeY: v->DETBufferSizeYThisState[k],
5277 DETBufferSizeC: v->DETBufferSizeCThisState[k],
5278 UrgentBurstFactorCursor: &v->UrgentBurstFactorCursorPre[k],
5279 UrgentBurstFactorLuma: &v->UrgentBurstFactorLumaPre[k],
5280 UrgentBurstFactorChroma: &v->UrgentBurstFactorChromaPre[k],
5281 NotEnoughUrgentLatencyHiding: &v->NotUrgentLatencyHidingPre[k]);
5282 }
5283
5284 v->MaximumReadBandwidthWithPrefetch = 0.0;
5285 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5286 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5287 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5288
5289 v->MaximumReadBandwidthWithPrefetch =
5290 v->MaximumReadBandwidthWithPrefetch
5291 + dml_max3(
5292 a: v->VActivePixelBandwidth[i][j][k]
5293 + v->VActiveCursorBandwidth[i][j][k]
5294 + v->NoOfDPP[i][j][k]
5295 * (v->meta_row_bandwidth[i][j][k]
5296 + v->dpte_row_bandwidth[i][j][k]),
5297 b: v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5298 c: v->NoOfDPP[i][j][k]
5299 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5300 * v->UrgentBurstFactorLumaPre[k]
5301 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5302 * v->UrgentBurstFactorChromaPre[k])
5303 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5304 }
5305
5306 v->NotEnoughUrgentLatencyHidingPre = false;
5307 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5308 if (v->NotUrgentLatencyHidingPre[k] == true) {
5309 v->NotEnoughUrgentLatencyHidingPre = true;
5310 }
5311 }
5312
5313 v->PrefetchSupported[i][j] = true;
5314 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5315 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5316 v->PrefetchSupported[i][j] = false;
5317 }
5318 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5319 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5320 || v->NoTimeForPrefetch[i][j][k] == true) {
5321 v->PrefetchSupported[i][j] = false;
5322 }
5323 }
5324
5325 v->DynamicMetadataSupported[i][j] = true;
5326 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5327 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5328 v->DynamicMetadataSupported[i][j] = false;
5329 }
5330 }
5331
5332 v->VRatioInPrefetchSupported[i][j] = true;
5333 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5334 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5335 v->VRatioInPrefetchSupported[i][j] = false;
5336 }
5337 }
5338 v->AnyLinesForVMOrRowTooLarge = false;
5339 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5340 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5341 v->AnyLinesForVMOrRowTooLarge = true;
5342 }
5343 }
5344
5345 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5346
5347 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5348 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5349 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5350 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5351 - dml_max(
5352 a: v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5353 b: v->NoOfDPP[i][j][k]
5354 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5355 * v->UrgentBurstFactorLumaPre[k]
5356 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5357 * v->UrgentBurstFactorChromaPre[k])
5358 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5359 }
5360 v->TotImmediateFlipBytes = 0.0;
5361 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5362 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5363 + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5364 + v->DPTEBytesPerRow[i][j][k]);
5365 }
5366
5367 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5368 CalculateFlipSchedule(
5369 mode_lib,
5370 k,
5371 HostVMInefficiencyFactor,
5372 UrgentExtraLatency: v->ExtraLatency,
5373 UrgentLatency: v->UrgLatency[i],
5374 PDEAndMetaPTEBytesPerFrame: v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5375 MetaRowBytes: v->MetaRowBytes[i][j][k],
5376 DPTEBytesPerRow: v->DPTEBytesPerRow[i][j][k]);
5377 }
5378 v->total_dcn_read_bw_with_flip = 0.0;
5379 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5380 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5381 + dml_max3(
5382 a: v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5383 b: v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5384 + v->VActiveCursorBandwidth[i][j][k],
5385 c: v->NoOfDPP[i][j][k]
5386 * (v->final_flip_bw[k]
5387 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5388 * v->UrgentBurstFactorLumaPre[k]
5389 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5390 * v->UrgentBurstFactorChromaPre[k])
5391 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5392 }
5393 v->ImmediateFlipSupportedForState[i][j] = true;
5394 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5395 v->ImmediateFlipSupportedForState[i][j] = false;
5396 }
5397 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5398 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5399 v->ImmediateFlipSupportedForState[i][j] = false;
5400 }
5401 }
5402 } else {
5403 v->ImmediateFlipSupportedForState[i][j] = false;
5404 }
5405
5406 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5407 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5408 NextPrefetchModeState = NextPrefetchModeState + 1;
5409 } else {
5410 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5411 }
5412 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5413 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5414 && ((v->HostVMEnable == false &&
5415 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5416 || v->ImmediateFlipSupportedForState[i][j] == true))
5417 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5418
5419 CalculateUnboundedRequestAndCompressedBufferSize(
5420 DETBufferSizeInKByte: v->DETBufferSizeInKByte[0],
5421 ConfigReturnBufferSizeInKByte: v->ConfigReturnBufferSizeInKByte,
5422 UseUnboundedRequestingFinal: v->UseUnboundedRequesting,
5423 TotalActiveDPP: v->TotalNumberOfActiveDPP[i][j],
5424 NoChromaPlanes: NoChroma,
5425 MaxNumDPP: v->MaxNumDPP,
5426 CompressedBufferSegmentSizeInkByteFinal: v->CompressedBufferSegmentSizeInkByte,
5427 Output: v->Output,
5428 UnboundedRequestEnabled: &UnboundedRequestEnabledThisState,
5429 CompressedBufferSizeInkByte: &CompressedBufferSizeInkByteThisState);
5430
5431 CalculateWatermarksAndDRAMSpeedChangeSupport(
5432 mode_lib,
5433 PrefetchMode: v->PrefetchModePerState[i][j],
5434 DCFCLK: v->DCFCLKState[i][j],
5435 ReturnBW: v->ReturnBWPerState[i][j],
5436 UrgentLatency: v->UrgLatency[i],
5437 ExtraLatency: v->ExtraLatency,
5438 SOCCLK: v->SOCCLKPerState[i],
5439 DCFCLKDeepSleep: v->ProjectedDCFCLKDeepSleep[i][j],
5440 DETBufferSizeY: v->DETBufferSizeYThisState,
5441 DETBufferSizeC: v->DETBufferSizeCThisState,
5442 SwathHeightY: v->SwathHeightYThisState,
5443 SwathHeightC: v->SwathHeightCThisState,
5444 SwathWidthY: v->SwathWidthYThisState,
5445 SwathWidthC: v->SwathWidthCThisState,
5446 DPPPerPlane: v->NoOfDPPThisState,
5447 BytePerPixelDETY: v->BytePerPixelInDETY,
5448 BytePerPixelDETC: v->BytePerPixelInDETC,
5449 UnboundedRequestEnabled: UnboundedRequestEnabledThisState,
5450 CompressedBufferSizeInkByte: CompressedBufferSizeInkByteThisState,
5451 DRAMClockChangeSupport: &v->DRAMClockChangeSupport[i][j],
5452 StutterExitWatermark: &dummy,
5453 StutterEnterPlusExitWatermark: &dummy,
5454 Z8StutterExitWatermark: &dummy,
5455 Z8StutterEnterPlusExitWatermark: &dummy);
5456 }
5457 }
5458
5459 /*PTE Buffer Size Check*/
5460 for (i = 0; i < v->soc.num_states; i++) {
5461 for (j = 0; j < 2; j++) {
5462 v->PTEBufferSizeNotExceeded[i][j] = true;
5463 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5464 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5465 v->PTEBufferSizeNotExceeded[i][j] = false;
5466 }
5467 }
5468 }
5469 }
5470
5471 /*Cursor Support Check*/
5472 v->CursorSupport = true;
5473 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5474 if (v->CursorWidth[k][0] > 0.0) {
5475 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5476 v->CursorSupport = false;
5477 }
5478 }
5479 }
5480
5481 /*Valid Pitch Check*/
5482 v->PitchSupport = true;
5483 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5484 v->AlignedYPitch[k] = dml_ceil(a: dml_max(a: v->PitchY[k], b: v->SurfaceWidthY[k]), granularity: v->MacroTileWidthY[k]);
5485 if (v->DCCEnable[k] == true) {
5486 v->AlignedDCCMetaPitchY[k] = dml_ceil(a: dml_max(a: v->DCCMetaPitchY[k], b: v->SurfaceWidthY[k]), granularity: 64.0 * v->Read256BlockWidthY[k]);
5487 } else {
5488 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5489 }
5490 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5491 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5492 && v->SourcePixelFormat[k] != dm_mono_8) {
5493 v->AlignedCPitch[k] = dml_ceil(a: dml_max(a: v->PitchC[k], b: v->SurfaceWidthC[k]), granularity: v->MacroTileWidthC[k]);
5494 if (v->DCCEnable[k] == true) {
5495 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5496 a: dml_max(a: v->DCCMetaPitchC[k], b: v->SurfaceWidthC[k]),
5497 granularity: 64.0 * v->Read256BlockWidthC[k]);
5498 } else {
5499 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5500 }
5501 } else {
5502 v->AlignedCPitch[k] = v->PitchC[k];
5503 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5504 }
5505 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5506 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5507 v->PitchSupport = false;
5508 }
5509 }
5510
5511 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5512 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5513 ViewportExceedsSurface = true;
5514 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5515 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5516 && v->SourcePixelFormat[k] != dm_rgbe) {
5517 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5518 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5519 ViewportExceedsSurface = true;
5520 }
5521 }
5522 }
5523 }
5524
5525 /*Mode Support, Voltage State and SOC Configuration*/
5526 for (i = v->soc.num_states - 1; i >= 0; i--) {
5527 for (j = 0; j < 2; j++) {
5528 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5529 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5530 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5531 && v->DTBCLKRequiredMoreThanSupported[i] == false
5532 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5533 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5534 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5535 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5536 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5537 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5538 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5539 && ((v->HostVMEnable == false
5540 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5541 || v->ImmediateFlipSupportedForState[i][j] == true)
5542 && FMTBufferExceeded == false) {
5543 v->ModeSupport[i][j] = true;
5544 } else {
5545 v->ModeSupport[i][j] = false;
5546 }
5547 }
5548 }
5549 for (i = v->soc.num_states; i >= 0; i--) {
5550 for (j = 0; j < 2; j++) {
5551 enum dm_validation_status status = DML_VALIDATION_OK;
5552
5553 if (!v->ScaleRatioAndTapsSupport) {
5554 status = DML_FAIL_SCALE_RATIO_TAP;
5555 } else if (!v->SourceFormatPixelAndScanSupport) {
5556 status = DML_FAIL_SOURCE_PIXEL_FORMAT;
5557 } else if (!v->ViewportSizeSupport[i][j]) {
5558 status = DML_FAIL_VIEWPORT_SIZE;
5559 } else if (P2IWith420) {
5560 status = DML_FAIL_P2I_WITH_420;
5561 } else if (DSCOnlyIfNecessaryWithBPP) {
5562 status = DML_FAIL_DSC_ONLY_IF_NECESSARY_WITH_BPP;
5563 } else if (DSC422NativeNotSupported) {
5564 status = DML_FAIL_NOT_DSC422_NATIVE;
5565 } else if (!v->ODMCombine4To1SupportCheckOK[i]) {
5566 status = DML_FAIL_ODM_COMBINE4TO1;
5567 } else if (v->NotEnoughDSCUnits[i]) {
5568 status = DML_FAIL_NOT_ENOUGH_DSC;
5569 } else if (!v->ROBSupport[i][j]) {
5570 status = DML_FAIL_REORDERING_BUFFER;
5571 } else if (!v->DISPCLK_DPPCLK_Support[i][j]) {
5572 status = DML_FAIL_DISPCLK_DPPCLK;
5573 } else if (!v->TotalAvailablePipesSupport[i][j]) {
5574 status = DML_FAIL_TOTAL_AVAILABLE_PIPES;
5575 } else if (!EnoughWritebackUnits) {
5576 status = DML_FAIL_ENOUGH_WRITEBACK_UNITS;
5577 } else if (!v->WritebackLatencySupport) {
5578 status = DML_FAIL_WRITEBACK_LATENCY;
5579 } else if (!v->WritebackScaleRatioAndTapsSupport) {
5580 status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP;
5581 } else if (!v->CursorSupport) {
5582 status = DML_FAIL_CURSOR_SUPPORT;
5583 } else if (!v->PitchSupport) {
5584 status = DML_FAIL_PITCH_SUPPORT;
5585 } else if (ViewportExceedsSurface) {
5586 status = DML_FAIL_VIEWPORT_EXCEEDS_SURFACE;
5587 } else if (!v->PrefetchSupported[i][j]) {
5588 status = DML_FAIL_PREFETCH_SUPPORT;
5589 } else if (!v->DynamicMetadataSupported[i][j]) {
5590 status = DML_FAIL_DYNAMIC_METADATA;
5591 } else if (!v->TotalVerticalActiveBandwidthSupport[i][j]) {
5592 status = DML_FAIL_TOTAL_V_ACTIVE_BW;
5593 } else if (!v->VRatioInPrefetchSupported[i][j]) {
5594 status = DML_FAIL_V_RATIO_PREFETCH;
5595 } else if (!v->PTEBufferSizeNotExceeded[i][j]) {
5596 status = DML_FAIL_PTE_BUFFER_SIZE;
5597 } else if (v->NonsupportedDSCInputBPC) {
5598 status = DML_FAIL_DSC_INPUT_BPC;
5599 } else if ((v->HostVMEnable
5600 && !v->ImmediateFlipSupportedForState[i][j])) {
5601 status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP;
5602 } else if (FMTBufferExceeded) {
5603 status = DML_FAIL_FMT_BUFFER_EXCEEDED;
5604 }
5605 mode_lib->vba.ValidationStatus[i] = status;
5606 }
5607 }
5608
5609 {
5610 unsigned int MaximumMPCCombine = 0;
5611
5612 for (i = v->soc.num_states; i >= 0; i--) {
5613 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5614 v->VoltageLevel = i;
5615 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5616 if (v->ModeSupport[i][0] == true) {
5617 MaximumMPCCombine = 0;
5618 } else {
5619 MaximumMPCCombine = 1;
5620 }
5621 }
5622 }
5623 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5624 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5625 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5626 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5627 }
5628 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5629 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5630 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5631 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5632 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5633 v->maxMpcComb = MaximumMPCCombine;
5634 }
5635}
5636
5637static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5638 struct display_mode_lib *mode_lib,
5639 unsigned int PrefetchMode,
5640 double DCFCLK,
5641 double ReturnBW,
5642 double UrgentLatency,
5643 double ExtraLatency,
5644 double SOCCLK,
5645 double DCFCLKDeepSleep,
5646 unsigned int DETBufferSizeY[],
5647 unsigned int DETBufferSizeC[],
5648 unsigned int SwathHeightY[],
5649 unsigned int SwathHeightC[],
5650 double SwathWidthY[],
5651 double SwathWidthC[],
5652 unsigned int DPPPerPlane[],
5653 double BytePerPixelDETY[],
5654 double BytePerPixelDETC[],
5655 bool UnboundedRequestEnabled,
5656 unsigned int CompressedBufferSizeInkByte,
5657 enum clock_change_support *DRAMClockChangeSupport,
5658 double *StutterExitWatermark,
5659 double *StutterEnterPlusExitWatermark,
5660 double *Z8StutterExitWatermark,
5661 double *Z8StutterEnterPlusExitWatermark)
5662{
5663 struct vba_vars_st *v = &mode_lib->vba;
5664 double EffectiveLBLatencyHidingY;
5665 double EffectiveLBLatencyHidingC;
5666 double LinesInDETY[DC__NUM_DPP__MAX];
5667 double LinesInDETC;
5668 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5669 unsigned int LinesInDETCRoundedDownToSwath;
5670 double FullDETBufferingTimeY;
5671 double FullDETBufferingTimeC;
5672 double ActiveDRAMClockChangeLatencyMarginY;
5673 double ActiveDRAMClockChangeLatencyMarginC;
5674 double WritebackDRAMClockChangeLatencyMargin;
5675 double PlaneWithMinActiveDRAMClockChangeMargin;
5676 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5677 double WritebackDRAMClockChangeLatencyHiding;
5678 double TotalPixelBW = 0.0;
5679 int k, j;
5680
5681 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5682
5683#ifdef __DML_VBA_DEBUG__
5684 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5685 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5686 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5687#endif
5688
5689 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5690
5691#ifdef __DML_VBA_DEBUG__
5692 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5693 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5694#endif
5695
5696 v->TotalActiveWriteback = 0;
5697 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5698 if (v->WritebackEnable[k] == true) {
5699 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5700 }
5701 }
5702
5703 if (v->TotalActiveWriteback <= 1) {
5704 v->WritebackUrgentWatermark = v->WritebackLatency;
5705 } else {
5706 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5707 }
5708
5709 if (v->TotalActiveWriteback <= 1) {
5710 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5711 } else {
5712 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5713 }
5714
5715 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5716 TotalPixelBW = TotalPixelBW
5717 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5718 / (v->HTotal[k] / v->PixelClock[k]);
5719 }
5720
5721 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5722 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5723
5724 v->LBLatencyHidingSourceLinesY = dml_min(
5725 a: (double) v->MaxLineBufferLines,
5726 b: dml_floor(a: v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(a: v->HRatio[k], b: 1.0)), granularity: 1)) - (v->vtaps[k] - 1);
5727
5728 v->LBLatencyHidingSourceLinesC = dml_min(
5729 a: (double) v->MaxLineBufferLines,
5730 b: dml_floor(a: v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(a: v->HRatioChroma[k], b: 1.0)), granularity: 1)) - (v->VTAPsChroma[k] - 1);
5731
5732 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5733
5734 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5735
5736 if (UnboundedRequestEnabled) {
5737 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5738 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5739 }
5740
5741 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5742 LinesInDETYRoundedDownToSwath[k] = dml_floor(a: LinesInDETY[k], granularity: SwathHeightY[k]);
5743 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5744 if (BytePerPixelDETC[k] > 0) {
5745 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5746 LinesInDETCRoundedDownToSwath = dml_floor(a: LinesInDETC, granularity: SwathHeightC[k]);
5747 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5748 } else {
5749 LinesInDETC = 0;
5750 FullDETBufferingTimeC = 999999;
5751 }
5752
5753 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5754 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5755
5756 if (v->NumberOfActivePlanes > 1) {
5757 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5758 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5759 }
5760
5761 if (BytePerPixelDETC[k] > 0) {
5762 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5763 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5764
5765 if (v->NumberOfActivePlanes > 1) {
5766 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5767 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5768 }
5769 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(a: ActiveDRAMClockChangeLatencyMarginY, b: ActiveDRAMClockChangeLatencyMarginC);
5770 } else {
5771 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5772 }
5773
5774 if (v->WritebackEnable[k] == true) {
5775 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5776 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5777 if (v->WritebackPixelFormat[k] == dm_444_64) {
5778 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5779 }
5780 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5781 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(a: v->ActiveDRAMClockChangeLatencyMargin[k], b: WritebackDRAMClockChangeLatencyMargin);
5782 }
5783 }
5784
5785 v->MinActiveDRAMClockChangeMargin = 999999;
5786 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5787 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5788 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5789 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5790 if (v->BlendingAndTiming[k] == k) {
5791 PlaneWithMinActiveDRAMClockChangeMargin = k;
5792 } else {
5793 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5794 if (v->BlendingAndTiming[k] == j) {
5795 PlaneWithMinActiveDRAMClockChangeMargin = j;
5796 }
5797 }
5798 }
5799 }
5800 }
5801
5802 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5803
5804 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5805 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5806 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5807 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5808 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5809 }
5810 }
5811
5812 v->TotalNumberOfActiveOTG = 0;
5813
5814 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5815 if (v->BlendingAndTiming[k] == k) {
5816 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5817 }
5818 }
5819
5820 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5821 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5822 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5823 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5824 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5825 } else {
5826 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5827 }
5828
5829 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5830 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5831 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5832 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5833
5834#ifdef __DML_VBA_DEBUG__
5835 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5836 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5837 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5838 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5839#endif
5840}
5841
5842static void CalculateDCFCLKDeepSleep(
5843 struct display_mode_lib *mode_lib,
5844 unsigned int NumberOfActivePlanes,
5845 int BytePerPixelY[],
5846 int BytePerPixelC[],
5847 double VRatio[],
5848 double VRatioChroma[],
5849 double SwathWidthY[],
5850 double SwathWidthC[],
5851 unsigned int DPPPerPlane[],
5852 double HRatio[],
5853 double HRatioChroma[],
5854 double PixelClock[],
5855 double PSCL_THROUGHPUT[],
5856 double PSCL_THROUGHPUT_CHROMA[],
5857 double DPPCLK[],
5858 double ReadBandwidthLuma[],
5859 double ReadBandwidthChroma[],
5860 int ReturnBusWidth,
5861 double *DCFCLKDeepSleep)
5862{
5863 struct vba_vars_st *v = &mode_lib->vba;
5864 double DisplayPipeLineDeliveryTimeLuma;
5865 double DisplayPipeLineDeliveryTimeChroma;
5866 double ReadBandwidth = 0.0;
5867 int k;
5868
5869 for (k = 0; k < NumberOfActivePlanes; ++k) {
5870
5871 if (VRatio[k] <= 1) {
5872 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5873 } else {
5874 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5875 }
5876 if (BytePerPixelC[k] == 0) {
5877 DisplayPipeLineDeliveryTimeChroma = 0;
5878 } else {
5879 if (VRatioChroma[k] <= 1) {
5880 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5881 } else {
5882 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5883 }
5884 }
5885
5886 if (BytePerPixelC[k] > 0) {
5887 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5888 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5889 } else {
5890 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5891 }
5892 v->DCFCLKDeepSleepPerPlane[k] = dml_max(a: v->DCFCLKDeepSleepPerPlane[k], b: PixelClock[k] / 16);
5893
5894 }
5895
5896 for (k = 0; k < NumberOfActivePlanes; ++k) {
5897 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5898 }
5899
5900 *DCFCLKDeepSleep = dml_max(a: 8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5901
5902 for (k = 0; k < NumberOfActivePlanes; ++k) {
5903 *DCFCLKDeepSleep = dml_max(a: *DCFCLKDeepSleep, b: v->DCFCLKDeepSleepPerPlane[k]);
5904 }
5905}
5906
5907static void CalculateUrgentBurstFactor(
5908 int swath_width_luma_ub,
5909 int swath_width_chroma_ub,
5910 unsigned int SwathHeightY,
5911 unsigned int SwathHeightC,
5912 double LineTime,
5913 double UrgentLatency,
5914 double CursorBufferSize,
5915 unsigned int CursorWidth,
5916 unsigned int CursorBPP,
5917 double VRatio,
5918 double VRatioC,
5919 double BytePerPixelInDETY,
5920 double BytePerPixelInDETC,
5921 double DETBufferSizeY,
5922 double DETBufferSizeC,
5923 double *UrgentBurstFactorCursor,
5924 double *UrgentBurstFactorLuma,
5925 double *UrgentBurstFactorChroma,
5926 bool *NotEnoughUrgentLatencyHiding)
5927{
5928 double LinesInDETLuma;
5929 double LinesInDETChroma;
5930 unsigned int LinesInCursorBuffer;
5931 double CursorBufferSizeInTime;
5932 double DETBufferSizeInTimeLuma;
5933 double DETBufferSizeInTimeChroma;
5934
5935 *NotEnoughUrgentLatencyHiding = 0;
5936
5937 if (CursorWidth > 0) {
5938 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(a: dml_log2(x: CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), granularity: 1.0);
5939 if (VRatio > 0) {
5940 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5941 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5942 *NotEnoughUrgentLatencyHiding = 1;
5943 *UrgentBurstFactorCursor = 0;
5944 } else {
5945 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5946 }
5947 } else {
5948 *UrgentBurstFactorCursor = 1;
5949 }
5950 }
5951
5952 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5953 if (VRatio > 0) {
5954 DETBufferSizeInTimeLuma = dml_floor(a: LinesInDETLuma, granularity: SwathHeightY) * LineTime / VRatio;
5955 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5956 *NotEnoughUrgentLatencyHiding = 1;
5957 *UrgentBurstFactorLuma = 0;
5958 } else {
5959 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5960 }
5961 } else {
5962 *UrgentBurstFactorLuma = 1;
5963 }
5964
5965 if (BytePerPixelInDETC > 0) {
5966 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5967 if (VRatio > 0) {
5968 DETBufferSizeInTimeChroma = dml_floor(a: LinesInDETChroma, granularity: SwathHeightC) * LineTime / VRatio;
5969 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5970 *NotEnoughUrgentLatencyHiding = 1;
5971 *UrgentBurstFactorChroma = 0;
5972 } else {
5973 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5974 }
5975 } else {
5976 *UrgentBurstFactorChroma = 1;
5977 }
5978 }
5979}
5980
5981static void CalculatePixelDeliveryTimes(
5982 unsigned int NumberOfActivePlanes,
5983 double VRatio[],
5984 double VRatioChroma[],
5985 double VRatioPrefetchY[],
5986 double VRatioPrefetchC[],
5987 unsigned int swath_width_luma_ub[],
5988 unsigned int swath_width_chroma_ub[],
5989 unsigned int DPPPerPlane[],
5990 double HRatio[],
5991 double HRatioChroma[],
5992 double PixelClock[],
5993 double PSCL_THROUGHPUT[],
5994 double PSCL_THROUGHPUT_CHROMA[],
5995 double DPPCLK[],
5996 int BytePerPixelC[],
5997 enum scan_direction_class SourceScan[],
5998 unsigned int NumberOfCursors[],
5999 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
6000 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
6001 unsigned int BlockWidth256BytesY[],
6002 unsigned int BlockHeight256BytesY[],
6003 unsigned int BlockWidth256BytesC[],
6004 unsigned int BlockHeight256BytesC[],
6005 double DisplayPipeLineDeliveryTimeLuma[],
6006 double DisplayPipeLineDeliveryTimeChroma[],
6007 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
6008 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
6009 double DisplayPipeRequestDeliveryTimeLuma[],
6010 double DisplayPipeRequestDeliveryTimeChroma[],
6011 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
6012 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
6013 double CursorRequestDeliveryTime[],
6014 double CursorRequestDeliveryTimePrefetch[])
6015{
6016 double req_per_swath_ub;
6017 int k;
6018
6019 for (k = 0; k < NumberOfActivePlanes; ++k) {
6020 if (VRatio[k] <= 1) {
6021 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6022 } else {
6023 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6024 }
6025
6026 if (BytePerPixelC[k] == 0) {
6027 DisplayPipeLineDeliveryTimeChroma[k] = 0;
6028 } else {
6029 if (VRatioChroma[k] <= 1) {
6030 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6031 } else {
6032 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6033 }
6034 }
6035
6036 if (VRatioPrefetchY[k] <= 1) {
6037 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
6038 } else {
6039 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
6040 }
6041
6042 if (BytePerPixelC[k] == 0) {
6043 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
6044 } else {
6045 if (VRatioPrefetchC[k] <= 1) {
6046 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
6047 } else {
6048 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
6049 }
6050 }
6051 }
6052
6053 for (k = 0; k < NumberOfActivePlanes; ++k) {
6054 if (SourceScan[k] != dm_vert) {
6055 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
6056 } else {
6057 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
6058 }
6059 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
6060 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
6061 if (BytePerPixelC[k] == 0) {
6062 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
6063 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
6064 } else {
6065 if (SourceScan[k] != dm_vert) {
6066 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
6067 } else {
6068 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
6069 }
6070 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
6071 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
6072 }
6073#ifdef __DML_VBA_DEBUG__
6074 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
6075 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
6076 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
6077 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
6078 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
6079 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
6080 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
6081 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
6082 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
6083 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
6084 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
6085 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
6086#endif
6087 }
6088
6089 for (k = 0; k < NumberOfActivePlanes; ++k) {
6090 int cursor_req_per_width;
6091
6092 cursor_req_per_width = dml_ceil(a: CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, granularity: 1);
6093 if (NumberOfCursors[k] > 0) {
6094 if (VRatio[k] <= 1) {
6095 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6096 } else {
6097 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6098 }
6099 if (VRatioPrefetchY[k] <= 1) {
6100 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6101 } else {
6102 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6103 }
6104 } else {
6105 CursorRequestDeliveryTime[k] = 0;
6106 CursorRequestDeliveryTimePrefetch[k] = 0;
6107 }
6108#ifdef __DML_VBA_DEBUG__
6109 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6110 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6111 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6112#endif
6113 }
6114}
6115
6116static void CalculateMetaAndPTETimes(
6117 int NumberOfActivePlanes,
6118 bool GPUVMEnable,
6119 int MetaChunkSize,
6120 int MinMetaChunkSizeBytes,
6121 int HTotal[],
6122 double VRatio[],
6123 double VRatioChroma[],
6124 double DestinationLinesToRequestRowInVBlank[],
6125 double DestinationLinesToRequestRowInImmediateFlip[],
6126 bool DCCEnable[],
6127 double PixelClock[],
6128 int BytePerPixelY[],
6129 int BytePerPixelC[],
6130 enum scan_direction_class SourceScan[],
6131 int dpte_row_height[],
6132 int dpte_row_height_chroma[],
6133 int meta_row_width[],
6134 int meta_row_width_chroma[],
6135 int meta_row_height[],
6136 int meta_row_height_chroma[],
6137 int meta_req_width[],
6138 int meta_req_width_chroma[],
6139 int meta_req_height[],
6140 int meta_req_height_chroma[],
6141 int dpte_group_bytes[],
6142 int PTERequestSizeY[],
6143 int PTERequestSizeC[],
6144 int PixelPTEReqWidthY[],
6145 int PixelPTEReqHeightY[],
6146 int PixelPTEReqWidthC[],
6147 int PixelPTEReqHeightC[],
6148 int dpte_row_width_luma_ub[],
6149 int dpte_row_width_chroma_ub[],
6150 double DST_Y_PER_PTE_ROW_NOM_L[],
6151 double DST_Y_PER_PTE_ROW_NOM_C[],
6152 double DST_Y_PER_META_ROW_NOM_L[],
6153 double DST_Y_PER_META_ROW_NOM_C[],
6154 double TimePerMetaChunkNominal[],
6155 double TimePerChromaMetaChunkNominal[],
6156 double TimePerMetaChunkVBlank[],
6157 double TimePerChromaMetaChunkVBlank[],
6158 double TimePerMetaChunkFlip[],
6159 double TimePerChromaMetaChunkFlip[],
6160 double time_per_pte_group_nom_luma[],
6161 double time_per_pte_group_vblank_luma[],
6162 double time_per_pte_group_flip_luma[],
6163 double time_per_pte_group_nom_chroma[],
6164 double time_per_pte_group_vblank_chroma[],
6165 double time_per_pte_group_flip_chroma[])
6166{
6167 unsigned int meta_chunk_width;
6168 unsigned int min_meta_chunk_width;
6169 unsigned int meta_chunk_per_row_int;
6170 unsigned int meta_row_remainder;
6171 unsigned int meta_chunk_threshold;
6172 unsigned int meta_chunks_per_row_ub;
6173 unsigned int meta_chunk_width_chroma;
6174 unsigned int min_meta_chunk_width_chroma;
6175 unsigned int meta_chunk_per_row_int_chroma;
6176 unsigned int meta_row_remainder_chroma;
6177 unsigned int meta_chunk_threshold_chroma;
6178 unsigned int meta_chunks_per_row_ub_chroma;
6179 unsigned int dpte_group_width_luma;
6180 unsigned int dpte_groups_per_row_luma_ub;
6181 unsigned int dpte_group_width_chroma;
6182 unsigned int dpte_groups_per_row_chroma_ub;
6183 int k;
6184
6185 for (k = 0; k < NumberOfActivePlanes; ++k) {
6186 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6187 if (BytePerPixelC[k] == 0) {
6188 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6189 } else {
6190 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6191 }
6192 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6193 if (BytePerPixelC[k] == 0) {
6194 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6195 } else {
6196 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6197 }
6198 }
6199
6200 for (k = 0; k < NumberOfActivePlanes; ++k) {
6201 if (DCCEnable[k] == true) {
6202 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6203 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6204 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6205 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6206 if (SourceScan[k] != dm_vert) {
6207 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6208 } else {
6209 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6210 }
6211 if (meta_row_remainder <= meta_chunk_threshold) {
6212 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6213 } else {
6214 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6215 }
6216 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6217 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6218 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6219 if (BytePerPixelC[k] == 0) {
6220 TimePerChromaMetaChunkNominal[k] = 0;
6221 TimePerChromaMetaChunkVBlank[k] = 0;
6222 TimePerChromaMetaChunkFlip[k] = 0;
6223 } else {
6224 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6225 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6226 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6227 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6228 if (SourceScan[k] != dm_vert) {
6229 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6230 } else {
6231 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6232 }
6233 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6234 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6235 } else {
6236 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6237 }
6238 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6239 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6240 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6241 }
6242 } else {
6243 TimePerMetaChunkNominal[k] = 0;
6244 TimePerMetaChunkVBlank[k] = 0;
6245 TimePerMetaChunkFlip[k] = 0;
6246 TimePerChromaMetaChunkNominal[k] = 0;
6247 TimePerChromaMetaChunkVBlank[k] = 0;
6248 TimePerChromaMetaChunkFlip[k] = 0;
6249 }
6250 }
6251
6252 for (k = 0; k < NumberOfActivePlanes; ++k) {
6253 if (GPUVMEnable == true) {
6254 if (SourceScan[k] != dm_vert) {
6255 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6256 } else {
6257 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6258 }
6259 dpte_groups_per_row_luma_ub = dml_ceil(a: 1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, granularity: 1);
6260 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6261 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6262 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6263 if (BytePerPixelC[k] == 0) {
6264 time_per_pte_group_nom_chroma[k] = 0;
6265 time_per_pte_group_vblank_chroma[k] = 0;
6266 time_per_pte_group_flip_chroma[k] = 0;
6267 } else {
6268 if (SourceScan[k] != dm_vert) {
6269 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6270 } else {
6271 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6272 }
6273 dpte_groups_per_row_chroma_ub = dml_ceil(a: 1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, granularity: 1);
6274 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6275 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6276 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6277 }
6278 } else {
6279 time_per_pte_group_nom_luma[k] = 0;
6280 time_per_pte_group_vblank_luma[k] = 0;
6281 time_per_pte_group_flip_luma[k] = 0;
6282 time_per_pte_group_nom_chroma[k] = 0;
6283 time_per_pte_group_vblank_chroma[k] = 0;
6284 time_per_pte_group_flip_chroma[k] = 0;
6285 }
6286 }
6287}
6288
6289static void CalculateVMGroupAndRequestTimes(
6290 unsigned int NumberOfActivePlanes,
6291 bool GPUVMEnable,
6292 unsigned int GPUVMMaxPageTableLevels,
6293 unsigned int HTotal[],
6294 int BytePerPixelC[],
6295 double DestinationLinesToRequestVMInVBlank[],
6296 double DestinationLinesToRequestVMInImmediateFlip[],
6297 bool DCCEnable[],
6298 double PixelClock[],
6299 int dpte_row_width_luma_ub[],
6300 int dpte_row_width_chroma_ub[],
6301 int vm_group_bytes[],
6302 unsigned int dpde0_bytes_per_frame_ub_l[],
6303 unsigned int dpde0_bytes_per_frame_ub_c[],
6304 int meta_pte_bytes_per_frame_ub_l[],
6305 int meta_pte_bytes_per_frame_ub_c[],
6306 double TimePerVMGroupVBlank[],
6307 double TimePerVMGroupFlip[],
6308 double TimePerVMRequestVBlank[],
6309 double TimePerVMRequestFlip[])
6310{
6311 int num_group_per_lower_vm_stage;
6312 int num_req_per_lower_vm_stage;
6313 int k;
6314
6315 for (k = 0; k < NumberOfActivePlanes; ++k) {
6316 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6317 if (DCCEnable[k] == false) {
6318 if (BytePerPixelC[k] > 0) {
6319 num_group_per_lower_vm_stage = dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1)
6320 + dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), granularity: 1);
6321 } else {
6322 num_group_per_lower_vm_stage = dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1);
6323 }
6324 } else {
6325 if (GPUVMMaxPageTableLevels == 1) {
6326 if (BytePerPixelC[k] > 0) {
6327 num_group_per_lower_vm_stage = dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1)
6328 + dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), granularity: 1);
6329 } else {
6330 num_group_per_lower_vm_stage = dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1);
6331 }
6332 } else {
6333 if (BytePerPixelC[k] > 0) {
6334 num_group_per_lower_vm_stage = 2 + dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1)
6335 + dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), granularity: 1)
6336 + dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1)
6337 + dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), granularity: 1);
6338 } else {
6339 num_group_per_lower_vm_stage = 1 + dml_ceil(a: (double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1)
6340 + dml_ceil(a: (double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), granularity: 1);
6341 }
6342 }
6343 }
6344
6345 if (DCCEnable[k] == false) {
6346 if (BytePerPixelC[k] > 0) {
6347 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6348 } else {
6349 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6350 }
6351 } else {
6352 if (GPUVMMaxPageTableLevels == 1) {
6353 if (BytePerPixelC[k] > 0) {
6354 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6355 } else {
6356 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6357 }
6358 } else {
6359 if (BytePerPixelC[k] > 0) {
6360 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6361 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6362 } else {
6363 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6364 }
6365 }
6366 }
6367
6368 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6369 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6370 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6371 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6372
6373 if (GPUVMMaxPageTableLevels > 2) {
6374 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6375 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6376 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6377 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6378 }
6379
6380 } else {
6381 TimePerVMGroupVBlank[k] = 0;
6382 TimePerVMGroupFlip[k] = 0;
6383 TimePerVMRequestVBlank[k] = 0;
6384 TimePerVMRequestFlip[k] = 0;
6385 }
6386 }
6387}
6388
6389static void CalculateStutterEfficiency(
6390 struct display_mode_lib *mode_lib,
6391 int CompressedBufferSizeInkByte,
6392 bool UnboundedRequestEnabled,
6393 int ConfigReturnBufferSizeInKByte,
6394 int MetaFIFOSizeInKEntries,
6395 int ZeroSizeBufferEntries,
6396 int NumberOfActivePlanes,
6397 int ROBBufferSizeInKByte,
6398 double TotalDataReadBandwidth,
6399 double DCFCLK,
6400 double ReturnBW,
6401 double COMPBUF_RESERVED_SPACE_64B,
6402 double COMPBUF_RESERVED_SPACE_ZS,
6403 double SRExitTime,
6404 double SRExitZ8Time,
6405 bool SynchronizedVBlank,
6406 double Z8StutterEnterPlusExitWatermark,
6407 double StutterEnterPlusExitWatermark,
6408 bool ProgressiveToInterlaceUnitInOPP,
6409 bool Interlace[],
6410 double MinTTUVBlank[],
6411 int DPPPerPlane[],
6412 unsigned int DETBufferSizeY[],
6413 int BytePerPixelY[],
6414 double BytePerPixelDETY[],
6415 double SwathWidthY[],
6416 int SwathHeightY[],
6417 int SwathHeightC[],
6418 double NetDCCRateLuma[],
6419 double NetDCCRateChroma[],
6420 double DCCFractionOfZeroSizeRequestsLuma[],
6421 double DCCFractionOfZeroSizeRequestsChroma[],
6422 int HTotal[],
6423 int VTotal[],
6424 double PixelClock[],
6425 double VRatio[],
6426 enum scan_direction_class SourceScan[],
6427 int BlockHeight256BytesY[],
6428 int BlockWidth256BytesY[],
6429 int BlockHeight256BytesC[],
6430 int BlockWidth256BytesC[],
6431 int DCCYMaxUncompressedBlock[],
6432 int DCCCMaxUncompressedBlock[],
6433 int VActive[],
6434 bool DCCEnable[],
6435 bool WritebackEnable[],
6436 double ReadBandwidthPlaneLuma[],
6437 double ReadBandwidthPlaneChroma[],
6438 double meta_row_bw[],
6439 double dpte_row_bw[],
6440 double *StutterEfficiencyNotIncludingVBlank,
6441 double *StutterEfficiency,
6442 int *NumberOfStutterBurstsPerFrame,
6443 double *Z8StutterEfficiencyNotIncludingVBlank,
6444 double *Z8StutterEfficiency,
6445 int *Z8NumberOfStutterBurstsPerFrame,
6446 double *StutterPeriod)
6447{
6448 struct vba_vars_st *v = &mode_lib->vba;
6449
6450 double DETBufferingTimeY;
6451 double SwathWidthYCriticalPlane = 0;
6452 double VActiveTimeCriticalPlane = 0;
6453 double FrameTimeCriticalPlane = 0;
6454 int BytePerPixelYCriticalPlane = 0;
6455 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6456 double MinTTUVBlankCriticalPlane = 0;
6457 double TotalCompressedReadBandwidth;
6458 double TotalRowReadBandwidth;
6459 double AverageDCCCompressionRate;
6460 double EffectiveCompressedBufferSize;
6461 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6462 double StutterBurstTime;
6463 int TotalActiveWriteback;
6464 double LinesInDETY;
6465 double LinesInDETYRoundedDownToSwath;
6466 double MaximumEffectiveCompressionLuma;
6467 double MaximumEffectiveCompressionChroma;
6468 double TotalZeroSizeRequestReadBandwidth;
6469 double TotalZeroSizeCompressedReadBandwidth;
6470 double AverageDCCZeroSizeFraction;
6471 double AverageZeroSizeCompressionRate;
6472 int TotalNumberOfActiveOTG = 0;
6473 double LastStutterPeriod = 0.0;
6474 double LastZ8StutterPeriod = 0.0;
6475 int k;
6476
6477 TotalZeroSizeRequestReadBandwidth = 0;
6478 TotalZeroSizeCompressedReadBandwidth = 0;
6479 TotalRowReadBandwidth = 0;
6480 TotalCompressedReadBandwidth = 0;
6481
6482 for (k = 0; k < NumberOfActivePlanes; ++k) {
6483 if (DCCEnable[k] == true) {
6484 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6485 || DCCYMaxUncompressedBlock[k] < 256) {
6486 MaximumEffectiveCompressionLuma = 2;
6487 } else {
6488 MaximumEffectiveCompressionLuma = 4;
6489 }
6490 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(a: NetDCCRateLuma[k], b: MaximumEffectiveCompressionLuma);
6491 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6492 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6493 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6494 if (ReadBandwidthPlaneChroma[k] > 0) {
6495 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6496 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6497 MaximumEffectiveCompressionChroma = 2;
6498 } else {
6499 MaximumEffectiveCompressionChroma = 4;
6500 }
6501 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6502 + ReadBandwidthPlaneChroma[k] / dml_min(a: NetDCCRateChroma[k], b: MaximumEffectiveCompressionChroma);
6503 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6504 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6505 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6506 }
6507 } else {
6508 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6509 }
6510 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6511 }
6512
6513 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6514 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6515
6516#ifdef __DML_VBA_DEBUG__
6517 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6518 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6519 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6520 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6521 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6522 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6523 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6524 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6525#endif
6526
6527 if (AverageDCCZeroSizeFraction == 1) {
6528 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6529 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6530 } else if (AverageDCCZeroSizeFraction > 0) {
6531 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6532 EffectiveCompressedBufferSize = dml_min(
6533 a: CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6534 b: MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6535 + dml_min(a: (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6536 b: (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6537 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6538 dml_print(
6539 "DML::%s: min 2 = %f\n",
6540 __func__,
6541 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6542 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6543 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6544 } else {
6545 EffectiveCompressedBufferSize = dml_min(
6546 a: CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6547 b: MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6548 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6549 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6550 }
6551
6552#ifdef __DML_VBA_DEBUG__
6553 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6554 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6555 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6556#endif
6557
6558 *StutterPeriod = 0;
6559 for (k = 0; k < NumberOfActivePlanes; ++k) {
6560 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6561 / BytePerPixelDETY[k] / SwathWidthY[k];
6562 LinesInDETYRoundedDownToSwath = dml_floor(a: LinesInDETY, granularity: SwathHeightY[k]);
6563 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6564#ifdef __DML_VBA_DEBUG__
6565 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6566 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6567 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6568 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6569 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6570 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6571 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6572 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6573 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6574 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6575 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6576 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6577#endif
6578
6579 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6580 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6581
6582 *StutterPeriod = DETBufferingTimeY;
6583 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(a: VTotal[k] / 2.0, granularity: 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6584 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(a: VActive[k] / 2.0, granularity: 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6585 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6586 SwathWidthYCriticalPlane = SwathWidthY[k];
6587 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6588 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6589
6590#ifdef __DML_VBA_DEBUG__
6591 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6592 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6593 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6594 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6595 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6596 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6597 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6598#endif
6599 }
6600 }
6601
6602 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(a: *StutterPeriod * TotalDataReadBandwidth, b: EffectiveCompressedBufferSize);
6603#ifdef __DML_VBA_DEBUG__
6604 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6605 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6606 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6607 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6608 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6609 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6610 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6611 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6612 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6613 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6614#endif
6615
6616 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6617 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6618 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6619#ifdef __DML_VBA_DEBUG__
6620 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6621 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6622 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6623 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6624 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6625#endif
6626 StutterBurstTime = dml_max(a: StutterBurstTime, b: LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6627
6628 dml_print(
6629 "DML::%s: Time to finish residue swath=%f\n",
6630 __func__,
6631 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6632
6633 TotalActiveWriteback = 0;
6634 for (k = 0; k < NumberOfActivePlanes; ++k) {
6635 if (WritebackEnable[k]) {
6636 TotalActiveWriteback = TotalActiveWriteback + 1;
6637 }
6638 }
6639
6640 if (TotalActiveWriteback == 0) {
6641#ifdef __DML_VBA_DEBUG__
6642 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6643 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6644 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6645 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6646#endif
6647 *StutterEfficiencyNotIncludingVBlank = dml_max(a: 0., b: 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6648 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(a: 0., b: 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6649 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(a: VActiveTimeCriticalPlane / *StutterPeriod, granularity: 1) : 0);
6650 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(a: VActiveTimeCriticalPlane / *StutterPeriod, granularity: 1) : 0);
6651 } else {
6652 *StutterEfficiencyNotIncludingVBlank = 0.;
6653 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6654 *NumberOfStutterBurstsPerFrame = 0;
6655 *Z8NumberOfStutterBurstsPerFrame = 0;
6656 }
6657#ifdef __DML_VBA_DEBUG__
6658 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6659 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6660 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6661 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6662 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6663#endif
6664
6665 for (k = 0; k < NumberOfActivePlanes; ++k) {
6666 if (v->BlendingAndTiming[k] == k) {
6667 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6668 }
6669 }
6670
6671 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6672 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6673
6674 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6675 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6676 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6677 } else {
6678 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6679 }
6680 } else {
6681 *StutterEfficiency = 0;
6682 }
6683
6684 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6685 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6686 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6687 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6688 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6689 } else {
6690 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6691 }
6692 } else {
6693 *Z8StutterEfficiency = 0.;
6694 }
6695
6696 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6697 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6698 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6699 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6700 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6701 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6702 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6703 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6704}
6705
6706static void CalculateSwathAndDETConfiguration(
6707 bool ForceSingleDPP,
6708 int NumberOfActivePlanes,
6709 unsigned int DETBufferSizeInKByte,
6710 double MaximumSwathWidthLuma[],
6711 double MaximumSwathWidthChroma[],
6712 enum scan_direction_class SourceScan[],
6713 enum source_format_class SourcePixelFormat[],
6714 enum dm_swizzle_mode SurfaceTiling[],
6715 int ViewportWidth[],
6716 int ViewportHeight[],
6717 int SurfaceWidthY[],
6718 int SurfaceWidthC[],
6719 int SurfaceHeightY[],
6720 int SurfaceHeightC[],
6721 int Read256BytesBlockHeightY[],
6722 int Read256BytesBlockHeightC[],
6723 int Read256BytesBlockWidthY[],
6724 int Read256BytesBlockWidthC[],
6725 enum odm_combine_mode ODMCombineEnabled[],
6726 int BlendingAndTiming[],
6727 int BytePerPixY[],
6728 int BytePerPixC[],
6729 double BytePerPixDETY[],
6730 double BytePerPixDETC[],
6731 int HActive[],
6732 double HRatio[],
6733 double HRatioChroma[],
6734 int DPPPerPlane[],
6735 int swath_width_luma_ub[],
6736 int swath_width_chroma_ub[],
6737 double SwathWidth[],
6738 double SwathWidthChroma[],
6739 int SwathHeightY[],
6740 int SwathHeightC[],
6741 unsigned int DETBufferSizeY[],
6742 unsigned int DETBufferSizeC[],
6743 bool ViewportSizeSupportPerPlane[],
6744 bool *ViewportSizeSupport)
6745{
6746 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6747 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6748 int MinimumSwathHeightY;
6749 int MinimumSwathHeightC;
6750 int RoundedUpMaxSwathSizeBytesY;
6751 int RoundedUpMaxSwathSizeBytesC;
6752 int RoundedUpMinSwathSizeBytesY;
6753 int RoundedUpMinSwathSizeBytesC;
6754 int RoundedUpSwathSizeBytesY;
6755 int RoundedUpSwathSizeBytesC;
6756 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6757 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6758 int k;
6759
6760 CalculateSwathWidth(
6761 ForceSingleDPP,
6762 NumberOfActivePlanes,
6763 SourcePixelFormat,
6764 SourceScan,
6765 ViewportWidth,
6766 ViewportHeight,
6767 SurfaceWidthY,
6768 SurfaceWidthC,
6769 SurfaceHeightY,
6770 SurfaceHeightC,
6771 ODMCombineEnabled,
6772 BytePerPixY,
6773 BytePerPixC,
6774 Read256BytesBlockHeightY,
6775 Read256BytesBlockHeightC,
6776 Read256BytesBlockWidthY,
6777 Read256BytesBlockWidthC,
6778 BlendingAndTiming,
6779 HActive,
6780 HRatio,
6781 DPPPerPlane,
6782 SwathWidthSingleDPPY: SwathWidthSingleDPP,
6783 SwathWidthSingleDPPC: SwathWidthSingleDPPChroma,
6784 SwathWidthY: SwathWidth,
6785 SwathWidthC: SwathWidthChroma,
6786 MaximumSwathHeightY,
6787 MaximumSwathHeightC,
6788 swath_width_luma_ub,
6789 swath_width_chroma_ub);
6790
6791 *ViewportSizeSupport = true;
6792 for (k = 0; k < NumberOfActivePlanes; ++k) {
6793 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6794 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6795 if (SurfaceTiling[k] == dm_sw_linear
6796 || (SourcePixelFormat[k] == dm_444_64
6797 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6798 && SourceScan[k] != dm_vert)) {
6799 MinimumSwathHeightY = MaximumSwathHeightY[k];
6800 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6801 MinimumSwathHeightY = MaximumSwathHeightY[k];
6802 } else {
6803 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6804 }
6805 MinimumSwathHeightC = MaximumSwathHeightC[k];
6806 } else {
6807 if (SurfaceTiling[k] == dm_sw_linear) {
6808 MinimumSwathHeightY = MaximumSwathHeightY[k];
6809 MinimumSwathHeightC = MaximumSwathHeightC[k];
6810 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6811 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6812 MinimumSwathHeightC = MaximumSwathHeightC[k];
6813 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6814 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6815 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6816 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6817 MinimumSwathHeightY = MaximumSwathHeightY[k];
6818 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6819 } else {
6820 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6821 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6822 }
6823 }
6824
6825 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6826 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6827 if (SourcePixelFormat[k] == dm_420_10) {
6828 RoundedUpMaxSwathSizeBytesY = dml_ceil(a: (double) RoundedUpMaxSwathSizeBytesY, granularity: 256);
6829 RoundedUpMinSwathSizeBytesY = dml_ceil(a: (double) RoundedUpMinSwathSizeBytesY, granularity: 256);
6830 }
6831 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6832 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6833 if (SourcePixelFormat[k] == dm_420_10) {
6834 RoundedUpMaxSwathSizeBytesC = dml_ceil(a: RoundedUpMaxSwathSizeBytesC, granularity: 256);
6835 RoundedUpMinSwathSizeBytesC = dml_ceil(a: RoundedUpMinSwathSizeBytesC, granularity: 256);
6836 }
6837
6838 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6839 SwathHeightY[k] = MaximumSwathHeightY[k];
6840 SwathHeightC[k] = MaximumSwathHeightC[k];
6841 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6842 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6843 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6844 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6845 SwathHeightY[k] = MinimumSwathHeightY;
6846 SwathHeightC[k] = MaximumSwathHeightC[k];
6847 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6848 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6849 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6850 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6851 SwathHeightY[k] = MaximumSwathHeightY[k];
6852 SwathHeightC[k] = MinimumSwathHeightC;
6853 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6854 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6855 } else {
6856 SwathHeightY[k] = MinimumSwathHeightY;
6857 SwathHeightC[k] = MinimumSwathHeightC;
6858 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6859 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6860 }
6861 {
6862 double actDETBufferSizeInKByte = dml_ceil(a: DETBufferSizeInKByte, granularity: 64);
6863
6864 if (SwathHeightC[k] == 0) {
6865 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6866 DETBufferSizeC[k] = 0;
6867 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6868 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6869 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6870 } else {
6871 DETBufferSizeY[k] = dml_floor(a: actDETBufferSizeInKByte * 1024 * 2 / 3, granularity: 1024);
6872 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6873 }
6874
6875 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6876 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6877 *ViewportSizeSupport = false;
6878 ViewportSizeSupportPerPlane[k] = false;
6879 } else {
6880 ViewportSizeSupportPerPlane[k] = true;
6881 }
6882 }
6883 }
6884}
6885
6886static void CalculateSwathWidth(
6887 bool ForceSingleDPP,
6888 int NumberOfActivePlanes,
6889 enum source_format_class SourcePixelFormat[],
6890 enum scan_direction_class SourceScan[],
6891 int ViewportWidth[],
6892 int ViewportHeight[],
6893 int SurfaceWidthY[],
6894 int SurfaceWidthC[],
6895 int SurfaceHeightY[],
6896 int SurfaceHeightC[],
6897 enum odm_combine_mode ODMCombineEnabled[],
6898 int BytePerPixY[],
6899 int BytePerPixC[],
6900 int Read256BytesBlockHeightY[],
6901 int Read256BytesBlockHeightC[],
6902 int Read256BytesBlockWidthY[],
6903 int Read256BytesBlockWidthC[],
6904 int BlendingAndTiming[],
6905 int HActive[],
6906 double HRatio[],
6907 int DPPPerPlane[],
6908 double SwathWidthSingleDPPY[],
6909 double SwathWidthSingleDPPC[],
6910 double SwathWidthY[],
6911 double SwathWidthC[],
6912 int MaximumSwathHeightY[],
6913 int MaximumSwathHeightC[],
6914 int swath_width_luma_ub[],
6915 int swath_width_chroma_ub[])
6916{
6917 enum odm_combine_mode MainPlaneODMCombine;
6918 int j, k;
6919
6920#ifdef __DML_VBA_DEBUG__
6921 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6922#endif
6923
6924 for (k = 0; k < NumberOfActivePlanes; ++k) {
6925 if (SourceScan[k] != dm_vert) {
6926 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6927 } else {
6928 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6929 }
6930
6931#ifdef __DML_VBA_DEBUG__
6932 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6933 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6934#endif
6935
6936 MainPlaneODMCombine = ODMCombineEnabled[k];
6937 for (j = 0; j < NumberOfActivePlanes; ++j) {
6938 if (BlendingAndTiming[k] == j) {
6939 MainPlaneODMCombine = ODMCombineEnabled[j];
6940 }
6941 }
6942
6943 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1)
6944 SwathWidthY[k] = dml_min(a: SwathWidthSingleDPPY[k], b: dml_round(a: HActive[k] / 4.0 * HRatio[k]));
6945 else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1)
6946 SwathWidthY[k] = dml_min(a: SwathWidthSingleDPPY[k], b: dml_round(a: HActive[k] / 2.0 * HRatio[k]));
6947 else if (DPPPerPlane[k] == 2)
6948 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6949 else
6950 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6951
6952#ifdef __DML_VBA_DEBUG__
6953 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6954 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6955#endif
6956
6957 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6958 SwathWidthC[k] = SwathWidthY[k] / 2;
6959 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6960 } else {
6961 SwathWidthC[k] = SwathWidthY[k];
6962 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6963 }
6964
6965 if (ForceSingleDPP == true) {
6966 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6967 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6968 }
6969 {
6970 int surface_width_ub_l = dml_ceil(a: SurfaceWidthY[k], granularity: Read256BytesBlockWidthY[k]);
6971 int surface_height_ub_l = dml_ceil(a: SurfaceHeightY[k], granularity: Read256BytesBlockHeightY[k]);
6972
6973#ifdef __DML_VBA_DEBUG__
6974 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6975#endif
6976
6977 if (SourceScan[k] != dm_vert) {
6978 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6979 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6980 swath_width_luma_ub[k] = dml_min(a: surface_width_ub_l, b: (int) dml_ceil(a: SwathWidthY[k] - 1, granularity: Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6981 if (BytePerPixC[k] > 0) {
6982 int surface_width_ub_c = dml_ceil(a: SurfaceWidthC[k], granularity: Read256BytesBlockWidthC[k]);
6983
6984 swath_width_chroma_ub[k] = dml_min(
6985 a: surface_width_ub_c,
6986 b: (int) dml_ceil(a: SwathWidthC[k] - 1, granularity: Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6987 } else {
6988 swath_width_chroma_ub[k] = 0;
6989 }
6990 } else {
6991 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6992 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6993 swath_width_luma_ub[k] = dml_min(a: surface_height_ub_l, b: (int) dml_ceil(a: SwathWidthY[k] - 1, granularity: Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6994 if (BytePerPixC[k] > 0) {
6995 int surface_height_ub_c = dml_ceil(a: SurfaceHeightC[k], granularity: Read256BytesBlockHeightC[k]);
6996
6997 swath_width_chroma_ub[k] = dml_min(
6998 a: surface_height_ub_c,
6999 b: (int) dml_ceil(a: SwathWidthC[k] - 1, granularity: Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
7000 } else {
7001 swath_width_chroma_ub[k] = 0;
7002 }
7003 }
7004 }
7005 }
7006}
7007
7008static double CalculateExtraLatency(
7009 int RoundTripPingLatencyCycles,
7010 int ReorderingBytes,
7011 double DCFCLK,
7012 int TotalNumberOfActiveDPP,
7013 int PixelChunkSizeInKByte,
7014 int TotalNumberOfDCCActiveDPP,
7015 int MetaChunkSize,
7016 double ReturnBW,
7017 bool GPUVMEnable,
7018 bool HostVMEnable,
7019 int NumberOfActivePlanes,
7020 int NumberOfDPP[],
7021 int dpte_group_bytes[],
7022 double HostVMInefficiencyFactor,
7023 double HostVMMinPageSize,
7024 int HostVMMaxNonCachedPageTableLevels)
7025{
7026 double ExtraLatencyBytes;
7027 double ExtraLatency;
7028
7029 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7030 ReorderingBytes,
7031 TotalNumberOfActiveDPP,
7032 PixelChunkSizeInKByte,
7033 TotalNumberOfDCCActiveDPP,
7034 MetaChunkSize,
7035 GPUVMEnable,
7036 HostVMEnable,
7037 NumberOfActivePlanes,
7038 NumberOfDPP,
7039 dpte_group_bytes,
7040 HostVMInefficiencyFactor,
7041 HostVMMinPageSize,
7042 HostVMMaxNonCachedPageTableLevels);
7043
7044 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
7045
7046#ifdef __DML_VBA_DEBUG__
7047 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
7048 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
7049 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
7050 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
7051 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
7052#endif
7053
7054 return ExtraLatency;
7055}
7056
7057static double CalculateExtraLatencyBytes(
7058 int ReorderingBytes,
7059 int TotalNumberOfActiveDPP,
7060 int PixelChunkSizeInKByte,
7061 int TotalNumberOfDCCActiveDPP,
7062 int MetaChunkSize,
7063 bool GPUVMEnable,
7064 bool HostVMEnable,
7065 int NumberOfActivePlanes,
7066 int NumberOfDPP[],
7067 int dpte_group_bytes[],
7068 double HostVMInefficiencyFactor,
7069 double HostVMMinPageSize,
7070 int HostVMMaxNonCachedPageTableLevels)
7071{
7072 double ret;
7073 int HostVMDynamicLevels = 0, k;
7074
7075 if (GPUVMEnable == true && HostVMEnable == true) {
7076 if (HostVMMinPageSize < 2048)
7077 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
7078 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
7079 HostVMDynamicLevels = dml_max(a: 0, b: (int) HostVMMaxNonCachedPageTableLevels - 1);
7080 else
7081 HostVMDynamicLevels = dml_max(a: 0, b: (int) HostVMMaxNonCachedPageTableLevels - 2);
7082 } else {
7083 HostVMDynamicLevels = 0;
7084 }
7085
7086 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7087
7088 if (GPUVMEnable == true) {
7089 for (k = 0; k < NumberOfActivePlanes; ++k)
7090 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7091 }
7092 return ret;
7093}
7094
7095static double CalculateUrgentLatency(
7096 double UrgentLatencyPixelDataOnly,
7097 double UrgentLatencyPixelMixedWithVMData,
7098 double UrgentLatencyVMDataOnly,
7099 bool DoUrgentLatencyAdjustment,
7100 double UrgentLatencyAdjustmentFabricClockComponent,
7101 double UrgentLatencyAdjustmentFabricClockReference,
7102 double FabricClock)
7103{
7104 double ret;
7105
7106 ret = dml_max3(a: UrgentLatencyPixelDataOnly, b: UrgentLatencyPixelMixedWithVMData, c: UrgentLatencyVMDataOnly);
7107 if (DoUrgentLatencyAdjustment == true)
7108 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7109 return ret;
7110}
7111
7112static noinline_for_stack void UseMinimumDCFCLK(
7113 struct display_mode_lib *mode_lib,
7114 int MaxPrefetchMode,
7115 int ReorderingBytes)
7116{
7117 struct vba_vars_st *v = &mode_lib->vba;
7118 int dummy1, i, j, k;
7119 double NormalEfficiency, dummy2, dummy3;
7120 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7121
7122 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7123 for (i = 0; i < v->soc.num_states; ++i) {
7124 for (j = 0; j <= 1; ++j) {
7125 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7126 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7127 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7128 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7129 double MinimumTWait;
7130 double NonDPTEBandwidth;
7131 double DPTEBandwidth;
7132 double DCFCLKRequiredForAverageBandwidth;
7133 double ExtraLatencyBytes;
7134 double ExtraLatencyCycles;
7135 double DCFCLKRequiredForPeakBandwidth;
7136 int NoOfDPPState[DC__NUM_DPP__MAX];
7137 double MinimumTvmPlus2Tr0;
7138
7139 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7140 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7141 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7142 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7143 }
7144
7145 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7146 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7147
7148 MinimumTWait = CalculateTWait(PrefetchMode: MaxPrefetchMode, DRAMClockChangeLatency: v->FinalDRAMClockChangeLatency, UrgentLatency: v->UrgLatency[i], SREnterPlusExitTime: v->SREnterPlusExitTime);
7149 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7150 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7151 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7152 DCFCLKRequiredForAverageBandwidth = dml_max3(
7153 a: v->ProjectedDCFCLKDeepSleep[i][j],
7154 b: (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7155 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7156 c: (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7157
7158 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7159 ReorderingBytes,
7160 TotalNumberOfActiveDPP: v->TotalNumberOfActiveDPP[i][j],
7161 PixelChunkSizeInKByte: v->PixelChunkSizeInKByte,
7162 TotalNumberOfDCCActiveDPP: v->TotalNumberOfDCCActiveDPP[i][j],
7163 MetaChunkSize: v->MetaChunkSize,
7164 GPUVMEnable: v->GPUVMEnable,
7165 HostVMEnable: v->HostVMEnable,
7166 NumberOfActivePlanes: v->NumberOfActivePlanes,
7167 NumberOfDPP: NoOfDPPState,
7168 dpte_group_bytes: v->dpte_group_bytes,
7169 HostVMInefficiencyFactor: 1,
7170 HostVMMinPageSize: v->HostVMMinPageSize,
7171 HostVMMaxNonCachedPageTableLevels: v->HostVMMaxNonCachedPageTableLevels);
7172 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7173 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7174 double DCFCLKCyclesRequiredInPrefetch;
7175 double ExpectedPrefetchBWAcceleration;
7176 double PrefetchTime;
7177
7178 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7179 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7180 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7181 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7182 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7183 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7184 PrefetchPixelLinesTime[k] = dml_max(a: v->PrefetchLinesY[i][j][k], b: v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7185 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7186 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7187 DynamicMetadataVMExtraLatency[k] =
7188 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7189 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7190 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7191 - v->UrgLatency[i]
7192 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7193 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7194 - DynamicMetadataVMExtraLatency[k];
7195
7196 if (PrefetchTime > 0) {
7197 double ExpectedVRatioPrefetch;
7198
7199 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7200 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7201 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7202 * dml_max(a: 1.0, b: ExpectedVRatioPrefetch) * dml_max(a: 1.0, b: ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7203 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7204 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7205 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7206 }
7207 } else {
7208 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7209 }
7210 if (v->DynamicMetadataEnable[k] == true) {
7211 double TSetupPipe;
7212 double TdmbfPipe;
7213 double TdmsksPipe;
7214 double TdmecPipe;
7215 double AllowedTimeForUrgentExtraLatency;
7216
7217 CalculateVupdateAndDynamicMetadataParameters(
7218 MaxInterDCNTileRepeaters: v->MaxInterDCNTileRepeaters,
7219 DPPCLK: v->RequiredDPPCLK[i][j][k],
7220 DISPCLK: v->RequiredDISPCLK[i][j],
7221 DCFClkDeepSleep: v->ProjectedDCFCLKDeepSleep[i][j],
7222 PixelClock: v->PixelClock[k],
7223 HTotal: v->HTotal[k],
7224 VBlank: v->VTotal[k] - v->VActive[k],
7225 DynamicMetadataTransmittedBytes: v->DynamicMetadataTransmittedBytes[k],
7226 DynamicMetadataLinesBeforeActiveRequired: v->DynamicMetadataLinesBeforeActiveRequired[k],
7227 InterlaceEnable: v->Interlace[k],
7228 ProgressiveToInterlaceUnitInOPP: v->ProgressiveToInterlaceUnitInOPP,
7229 TSetup: &TSetupPipe,
7230 Tdmbf: &TdmbfPipe,
7231 Tdmec: &TdmecPipe,
7232 Tdmsks: &TdmsksPipe,
7233 VUpdateOffsetPix: &dummy1,
7234 VUpdateWidthPix: &dummy2,
7235 VReadyOffsetPix: &dummy3);
7236 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7237 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7238 if (AllowedTimeForUrgentExtraLatency > 0) {
7239 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7240 a: DCFCLKRequiredForPeakBandwidthPerPlane[k],
7241 b: ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7242 } else {
7243 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7244 }
7245 }
7246 }
7247 DCFCLKRequiredForPeakBandwidth = 0;
7248 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k)
7249 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7250
7251 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7252 * (v->GPUVMEnable == true ?
7253 (v->HostVMEnable == true ?
7254 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7255 0);
7256 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7257 double MaximumTvmPlus2Tr0PlusTsw;
7258
7259 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7260 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7261 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7262 } else {
7263 DCFCLKRequiredForPeakBandwidth = dml_max3(
7264 a: DCFCLKRequiredForPeakBandwidth,
7265 b: 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7266 c: (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7267 }
7268 }
7269 v->DCFCLKState[i][j] = dml_min(a: v->DCFCLKPerState[i], b: 1.05 * dml_max(a: DCFCLKRequiredForAverageBandwidth, b: DCFCLKRequiredForPeakBandwidth));
7270 }
7271 }
7272}
7273
7274static void CalculateUnboundedRequestAndCompressedBufferSize(
7275 unsigned int DETBufferSizeInKByte,
7276 int ConfigReturnBufferSizeInKByte,
7277 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7278 int TotalActiveDPP,
7279 bool NoChromaPlanes,
7280 int MaxNumDPP,
7281 int CompressedBufferSegmentSizeInkByteFinal,
7282 enum output_encoder_class *Output,
7283 bool *UnboundedRequestEnabled,
7284 int *CompressedBufferSizeInkByte)
7285{
7286 double actDETBufferSizeInKByte = dml_ceil(a: DETBufferSizeInKByte, granularity: 64);
7287
7288 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalNumberOfActiveDPP: TotalActiveDPP, NoChroma: NoChromaPlanes, Output: Output[0]);
7289 *CompressedBufferSizeInkByte = (
7290 *UnboundedRequestEnabled == true ?
7291 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7292 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7293 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7294
7295#ifdef __DML_VBA_DEBUG__
7296 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7297 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7298 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7299 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7300 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7301 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7302 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7303#endif
7304}
7305
7306static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7307{
7308 bool ret_val = false;
7309
7310 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7311 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
7312 ret_val = false;
7313 return ret_val;
7314}
7315
7316static unsigned int CalculateMaxVStartup(
7317 unsigned int VTotal,
7318 unsigned int VActive,
7319 unsigned int VBlankNom,
7320 unsigned int HTotal,
7321 double PixelClock,
7322 bool ProgressiveTointerlaceUnitinOPP,
7323 bool Interlace,
7324 unsigned int VBlankNomDefaultUS,
7325 double WritebackDelayTime)
7326{
7327 unsigned int MaxVStartup = 0;
7328 unsigned int vblank_size = 0;
7329 double line_time_us = HTotal / PixelClock;
7330 unsigned int vblank_actual = VTotal - VActive;
7331 unsigned int vblank_nom_default_in_line = dml_floor(a: VBlankNomDefaultUS / line_time_us, granularity: 1.0);
7332 unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line);
7333 unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input;
7334
7335 vblank_size = (unsigned int) dml_min(a: vblank_actual, b: vblank_avail);
7336 if (Interlace && !ProgressiveTointerlaceUnitinOPP)
7337 MaxVStartup = dml_floor(a: vblank_size / 2.0, granularity: 1.0);
7338 else
7339 MaxVStartup = vblank_size - dml_max(a: 1.0, b: dml_ceil(a: WritebackDelayTime / line_time_us, granularity: 1.0));
7340 if (MaxVStartup > 1023)
7341 MaxVStartup = 1023;
7342 return MaxVStartup;
7343}
7344

source code of linux/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c