Skip to content

Commit c8d7551

Browse files
committed
drm/vc4: Skip input lines when doing a large downscale
The TPZ filter wants ideally 3 lines of image to give good quality downscaling. More than that leads to excessive SDRAM bandwidth for no gain. If the downsample factor allows for it, reduce the programmed image height and increase the pitch to compensate. This currently does not handle T-format images where we need to configure it slightly differently. Signed-off-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
1 parent 8b22b5c commit c8d7551

3 files changed

Lines changed: 59 additions & 13 deletions

File tree

drivers/gpu/drm/vc4/vc4_drv.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,11 @@ struct vc4_plane_state {
480480
*/
481481
bool is_yuv444_unity;
482482

483+
/* Skip lines on large downscales to avoid consuming too much SDRAM
484+
* bandwidth
485+
*/
486+
unsigned int vdownsample;
487+
483488
/* Our allocation in LBM for temporary storage during scaling. */
484489
unsigned int lbm_handle;
485490

drivers/gpu/drm/vc4/vc4_plane.c

Lines changed: 50 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -935,7 +935,7 @@ static void vc4_write_scaling_parameters(struct drm_plane_state *state,
935935

936936
/* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
937937
if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
938-
vc4_write_tpz(vc4_state, vc4_state->src_h[channel],
938+
vc4_write_tpz(vc4_state, vc4_state->src_h[channel] / vc4_state->vdownsample,
939939
vc4_state->crtc_h);
940940
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
941941
}
@@ -1361,6 +1361,17 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
13611361
return 0;
13621362
}
13631363

1364+
if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ &&
1365+
vc4_state->src_h[0] / vc4_state->crtc_h > (3 << 16)) {
1366+
/* Downscaling by more than x3. Reduce the number of lines read
1367+
* to avoid exceeding SDRAM bandwidth.
1368+
*/
1369+
vc4_state->vdownsample = ((vc4_state->src_h[0] /
1370+
(vc4_state->crtc_h * 3)) >> 16) + 1;
1371+
} else {
1372+
vc4_state->vdownsample = 1;
1373+
}
1374+
13641375
width = vc4_state->src_w[0] >> 16;
13651376
height = vc4_state->src_h[0] >> 16;
13661377

@@ -1406,6 +1417,9 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
14061417
break;
14071418

14081419
case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
1420+
/* Line skipping decimation currently not supported for T-format */
1421+
vc4_state->vdownsample = 1;
1422+
14091423
u32 tile_size_shift = 12; /* T tiles are 4kb */
14101424
/* Whole-tile offsets, mostly for setting the pitch. */
14111425
u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
@@ -1558,6 +1572,9 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
15581572
offsets[i] += pitch[i] * tile * tile_width;
15591573
offsets[i] += src_y / (i ? v_subsample : 1) * tile_width;
15601574
offsets[i] += x_off & ~(i ? 1 : 0);
1575+
1576+
pitch[i] |= VC4_SET_FIELD(vc4_state->vdownsample - 1,
1577+
SCALER_TILE_SKIP_0);
15611578
}
15621579
break;
15631580
}
@@ -1635,7 +1652,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
16351652
(mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
16361653
vc4_hvs4_get_alpha_blend_mode(state) |
16371654
VC4_SET_FIELD(width, SCALER_POS2_WIDTH) |
1638-
VC4_SET_FIELD(height, SCALER_POS2_HEIGHT));
1655+
VC4_SET_FIELD(height / vc4_state->vdownsample,
1656+
SCALER_POS2_HEIGHT));
16391657

16401658
/* Position Word 3: Context. Written by the HVS. */
16411659
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
@@ -1689,7 +1707,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
16891707
vc4_state->pos2_offset = vc4_state->dlist_count;
16901708
vc4_dlist_write(vc4_state,
16911709
VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) |
1692-
VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT));
1710+
VC4_SET_FIELD(height / vc4_state->vdownsample,
1711+
SCALER5_POS2_HEIGHT));
16931712

16941713
/* Position Word 3: Context. Written by the HVS. */
16951714
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
@@ -1713,18 +1732,22 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
17131732
vc4_dlist_write(vc4_state, 0xc0c0c0c0);
17141733

17151734
/* Pitch word 0 */
1716-
vc4_dlist_write(vc4_state, pitch[0]);
1735+
if (hvs_format != HVS_PIXEL_FORMAT_H264 &&
1736+
hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT)
1737+
vc4_dlist_write(vc4_state, pitch[0] * vc4_state->vdownsample);
1738+
else
1739+
vc4_dlist_write(vc4_state, pitch[0]);
17171740

17181741
/* Pitch word 1/2 */
17191742
for (i = 1; i < num_planes; i++) {
17201743
if (hvs_format != HVS_PIXEL_FORMAT_H264 &&
1721-
hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) {
1744+
hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT)
17221745
vc4_dlist_write(vc4_state,
1723-
VC4_SET_FIELD(fb->pitches[i],
1746+
VC4_SET_FIELD(fb->pitches[i] /
1747+
vc4_state->vdownsample,
17241748
SCALER_SRC_PITCH));
1725-
} else {
1726-
vc4_dlist_write(vc4_state, pitch[1]);
1727-
}
1749+
else
1750+
vc4_dlist_write(vc4_state, pitch[i]);
17281751
}
17291752

17301753
/* Colorspace conversion words */
@@ -1938,6 +1961,17 @@ static int vc6_plane_mode_set(struct drm_plane *plane,
19381961
width = vc4_state->src_w[0] >> 16;
19391962
height = vc4_state->src_h[0] >> 16;
19401963

1964+
if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ &&
1965+
vc4_state->src_h[0] / vc4_state->crtc_h > (3 << 16)) {
1966+
/* Downscaling by more than x3. Reduce the number of lines read
1967+
* to avoid exceeding SDRAM bandwidth.
1968+
*/
1969+
vc4_state->vdownsample = ((vc4_state->src_h[0] /
1970+
(vc4_state->crtc_h * 3)) >> 16) + 1;
1971+
} else {
1972+
vc4_state->vdownsample = 1;
1973+
}
1974+
19411975
/* SCL1 is used for Cb/Cr scaling of planar formats. For RGB
19421976
* and 4:4:4, scl1 should be set to scl0 so both channels of
19431977
* the scaler do the same thing. For YUV, the Y plane needs
@@ -2090,8 +2124,11 @@ static int vc6_plane_mode_set(struct drm_plane *plane,
20902124
* Finished using the pitch as a pitch, so pack it as the
20912125
* register value.
20922126
*/
2093-
pitch[i] = VC4_SET_FIELD(pitch[i], SCALER6_PTR2_PITCH) |
2094-
VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT);
2127+
pitch[i] = VC4_SET_FIELD(pitch[i], SCALER6_PTR2_TILE_HEIGHT) |
2128+
VC4_SET_FIELD(fetch_count - 1,
2129+
SCALER6_PTR2_TILE_FETCH_COUNT) |
2130+
VC4_SET_FIELD(vc4_state->vdownsample - 1,
2131+
SCALER6_PTR2_TILE_LSKIP);
20952132
}
20962133

20972134
break;
@@ -2152,7 +2189,7 @@ static int vc6_plane_mode_set(struct drm_plane *plane,
21522189
/* Position Word 2: Source Image Size */
21532190
vc4_state->pos2_offset = vc4_state->dlist_count;
21542191
vc4_dlist_write(vc4_state,
2155-
VC4_SET_FIELD(height - 1,
2192+
VC4_SET_FIELD((height / vc4_state->vdownsample) - 1,
21562193
SCALER6_POS2_SRC_LINES) |
21572194
VC4_SET_FIELD(width - 1,
21582195
SCALER6_POS2_SRC_WIDTH));
@@ -2187,7 +2224,7 @@ static int vc6_plane_mode_set(struct drm_plane *plane,
21872224
if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 &&
21882225
base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) {
21892226
vc4_dlist_write(vc4_state,
2190-
VC4_SET_FIELD(fb->pitches[i],
2227+
VC4_SET_FIELD((fb->pitches[i] * vc4_state->vdownsample),
21912228
SCALER6_PTR2_PITCH));
21922229
} else {
21932230
vc4_dlist_write(vc4_state, pitch[i]);

drivers/gpu/drm/vc4/vc4_regs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,4 +1415,8 @@ enum hvs_pixel_format {
14151415
#define SCALER6_PTR2_PITCH_MASK VC4_MASK(16, 0)
14161416
#define SCALER6_PTR2_FETCH_COUNT_MASK VC4_MASK(26, 16)
14171417

1418+
#define SCALER6_PTR2_TILE_LSKIP_MASK VC4_MASK(31, 29)
1419+
#define SCALER6_PTR2_TILE_FETCH_COUNT_MASK VC4_MASK(26, 16)
1420+
#define SCALER6_PTR2_TILE_HEIGHT_MASK VC4_MASK(16, 0)
1421+
14181422
#endif /* VC4_REGS_H */

0 commit comments

Comments
 (0)