From f427fb16cf756548c39256b569cf083f39bcc4e9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 28 Dec 2015 14:14:09 -0800 Subject: [PATCH 01/10] drm/vc4: Improve comments on vc4_plane_state members. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 0addbad15832..45e353d65c3d 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -26,16 +26,19 @@ struct vc4_plane_state { struct drm_plane_state base; + /* System memory copy of the display list for this element, computed + * at atomic_check time. + */ u32 *dlist; - u32 dlist_size; /* Number of dwords in allocated for the display list */ + u32 dlist_size; /* Number of dwords allocated for the display list */ u32 dlist_count; /* Number of used dwords in the display list. */ /* Offset in the dlist to pointer word 0. */ u32 pw0_offset; /* Offset where the plane's dlist was last stored in the - hardware at vc4_crtc_atomic_flush() time. - */ + * hardware at vc4_crtc_atomic_flush() time. + */ u32 *hw_dlist; }; From 17eac75111ebda33e13d8d8d98aaedfc1a9c2abf Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 28 Dec 2015 14:14:57 -0800 Subject: [PATCH 02/10] drm/vc4: Add missing __iomem annotation to hw_dlist. This is the pointer to the HVS device's memory where we stored the contents of *dlist. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 45e353d65c3d..ed07ee57e1bf 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -39,7 +39,7 @@ struct vc4_plane_state { /* Offset where the plane's dlist was last stored in the * hardware at vc4_crtc_atomic_flush() time. */ - u32 *hw_dlist; + u32 __iomem *hw_dlist; }; static inline struct vc4_plane_state * From 5c6799942003df91801b1d2277bba34d71f99603 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 28 Dec 2015 14:34:44 -0800 Subject: [PATCH 03/10] drm/vc4: Move the plane clipping/scaling setup to a separate function. As we add actual scaling, this is going to get way more complicated. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 78 ++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index ed07ee57e1bf..554ed54cc8a7 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -40,6 +40,14 @@ struct vc4_plane_state { * hardware at vc4_crtc_atomic_flush() time. */ u32 __iomem *hw_dlist; + + /* Clipped coordinates of the plane on the display. */ + int crtc_x, crtc_y, crtc_w, crtc_h; + + /* Offset to start scanning out from the start of the plane's + * BO. + */ + u32 offset; }; static inline struct vc4_plane_state * @@ -151,22 +159,17 @@ static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) vc4_state->dlist[vc4_state->dlist_count++] = val; } -/* Writes out a full display list for an active plane to the plane's - * private dlist state. - */ -static int vc4_plane_mode_set(struct drm_plane *plane, - struct drm_plane_state *state) +static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; - struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); - u32 ctl0_offset = vc4_state->dlist_count; - const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format); - uint32_t offset = fb->offsets[0]; - int crtc_x = state->crtc_x; - int crtc_y = state->crtc_y; - int crtc_w = state->crtc_w; - int crtc_h = state->crtc_h; + + vc4_state->offset = fb->offsets[0]; + + vc4_state->crtc_x = state->crtc_x; + vc4_state->crtc_y = state->crtc_y; + vc4_state->crtc_w = state->crtc_w; + vc4_state->crtc_h = state->crtc_h; if (state->crtc_w << 16 != state->src_w || state->crtc_h << 16 != state->src_h) { @@ -178,18 +181,41 @@ static int vc4_plane_mode_set(struct drm_plane *plane, return -EINVAL; } - if (crtc_x < 0) { - offset += drm_format_plane_cpp(fb->pixel_format, 0) * -crtc_x; - crtc_w += crtc_x; - crtc_x = 0; + if (vc4_state->crtc_x < 0) { + vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format, + 0) * + -vc4_state->crtc_x); + vc4_state->crtc_w += vc4_state->crtc_x; + vc4_state->crtc_x = 0; } - if (crtc_y < 0) { - offset += fb->pitches[0] * -crtc_y; - crtc_h += crtc_y; - crtc_y = 0; + if (vc4_state->crtc_y < 0) { + vc4_state->offset += fb->pitches[0] * -vc4_state->crtc_y; + vc4_state->crtc_h += vc4_state->crtc_y; + vc4_state->crtc_y = 0; } + return 0; +} + + +/* Writes out a full display list for an active plane to the plane's + * private dlist state. + */ +static int vc4_plane_mode_set(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + struct drm_framebuffer *fb = state->fb; + struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); + u32 ctl0_offset = vc4_state->dlist_count; + const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format); + int ret; + + ret = vc4_plane_setup_clipping_and_scaling(state); + if (ret) + return ret; + vc4_dlist_write(vc4_state, SCALER_CTL0_VALID | (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | @@ -199,8 +225,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, /* Position Word 0: Image Positions and Alpha Value */ vc4_dlist_write(vc4_state, VC4_SET_FIELD(0xff, SCALER_POS0_FIXED_ALPHA) | - VC4_SET_FIELD(crtc_x, SCALER_POS0_START_X) | - VC4_SET_FIELD(crtc_y, SCALER_POS0_START_Y)); + VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | + VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); /* Position Word 1: Scaled Image Dimensions. * Skipped due to SCALER_CTL0_UNITY scaling. @@ -212,8 +238,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_POS2_ALPHA_MODE_PIPELINE : SCALER_POS2_ALPHA_MODE_FIXED, SCALER_POS2_ALPHA_MODE) | - VC4_SET_FIELD(crtc_w, SCALER_POS2_WIDTH) | - VC4_SET_FIELD(crtc_h, SCALER_POS2_HEIGHT)); + VC4_SET_FIELD(vc4_state->crtc_w, SCALER_POS2_WIDTH) | + VC4_SET_FIELD(vc4_state->crtc_h, SCALER_POS2_HEIGHT)); /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); @@ -221,7 +247,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_state->pw0_offset = vc4_state->dlist_count; /* Pointer Word 0: RGB / Y Pointer */ - vc4_dlist_write(vc4_state, bo->paddr + offset); + vc4_dlist_write(vc4_state, bo->paddr + vc4_state->offset); /* Pointer Context Word 0: Written by the HVS */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); From 6674a904d68041d982ffb284d2827410765a097a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 30 Dec 2015 11:50:22 -0800 Subject: [PATCH 04/10] drm/vc4: Add a proper short-circut path for legacy cursor updates. Previously, on every modeset we would allocate new display list memory, recompute changed planes, write all of them to the new memory, and pointed scanout at the new list (which will latch approximately at the next line of scanout). We let drm_atomic_helper_wait_for_vblanks() decide whether we needed to wait for a vblank after a modeset before cleaning up the old state and letting the next modeset proceed, and on legacy cursor updates we wouldn't wait. If you moved the cursor fast enough, we could potentially wrap around the display list memory area and overwrite the existing display list while it was still being scanned out, resulting in the HVS scanning out garbage or just halting. Instead of making cursor updates wait for scanout to move to the new display list area (which introduces significant cursor lag in X), we just rewrite our current display list. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_kms.c | 9 ++++ drivers/gpu/drm/vc4/vc4_plane.c | 94 ++++++++++++++++++++++++++++++--- 2 files changed, 96 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index f95f2df5f8d1..4718ae5176cc 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -49,6 +49,15 @@ vc4_atomic_complete_commit(struct vc4_commit *c) drm_atomic_helper_commit_modeset_enables(dev, state); + /* Make sure that drm_atomic_helper_wait_for_vblanks() + * actually waits for vblank. If we're doing a full atomic + * modeset (as opposed to a vc4_update_plane() short circuit), + * then we need to wait for scanout to be done with our + * display lists before we free it and potentially reallocate + * and overwrite the dlist memory with a new modeset. + */ + state->legacy_cursor_update = false; + drm_atomic_helper_wait_for_vblanks(dev, state); drm_atomic_helper_cleanup_planes(dev, state); diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 554ed54cc8a7..713ec006baa9 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -33,8 +33,12 @@ struct vc4_plane_state { u32 dlist_size; /* Number of dwords allocated for the display list */ u32 dlist_count; /* Number of used dwords in the display list. */ - /* Offset in the dlist to pointer word 0. */ - u32 pw0_offset; + /* Offset in the dlist to various words, for pageflip or + * cursor updates. + */ + u32 pos0_offset; + u32 pos2_offset; + u32 ptr0_offset; /* Offset where the plane's dlist was last stored in the * hardware at vc4_crtc_atomic_flush() time. @@ -223,6 +227,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_CTL0_UNITY); /* Position Word 0: Image Positions and Alpha Value */ + vc4_state->pos0_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, VC4_SET_FIELD(0xff, SCALER_POS0_FIXED_ALPHA) | VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | @@ -233,6 +238,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, */ /* Position Word 2: Source Image Size, Alpha Mode */ + vc4_state->pos2_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, VC4_SET_FIELD(format->has_alpha ? SCALER_POS2_ALPHA_MODE_PIPELINE : @@ -244,9 +250,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); - vc4_state->pw0_offset = vc4_state->dlist_count; - /* Pointer Word 0: RGB / Y Pointer */ + vc4_state->ptr0_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, bo->paddr + vc4_state->offset); /* Pointer Context Word 0: Written by the HVS */ @@ -332,13 +337,13 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) * scanout will start from this address as soon as the FIFO * needs to refill with pixels. */ - writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]); + writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]); /* Also update the CPU-side dlist copy, so that any later * atomic updates that don't do a new modeset on our plane * also use our updated address. */ - vc4_state->dlist[vc4_state->pw0_offset] = addr; + vc4_state->dlist[vc4_state->ptr0_offset] = addr; } static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { @@ -354,8 +359,83 @@ static void vc4_plane_destroy(struct drm_plane *plane) drm_plane_cleanup(plane); } +/* Implements immediate (non-vblank-synced) updates of the cursor + * position, or falls back to the atomic helper otherwise. + */ +static int +vc4_update_plane(struct drm_plane *plane, + struct drm_crtc *crtc, + struct drm_framebuffer *fb, + int crtc_x, int crtc_y, + unsigned int crtc_w, unsigned int crtc_h, + uint32_t src_x, uint32_t src_y, + uint32_t src_w, uint32_t src_h) +{ + struct drm_plane_state *plane_state; + struct vc4_plane_state *vc4_state; + + if (plane != crtc->cursor) + goto out; + + plane_state = plane->state; + vc4_state = to_vc4_plane_state(plane_state); + + if (!plane_state) + goto out; + + /* If we're changing the cursor contents, do that in the + * normal vblank-synced atomic path. + */ + if (fb != plane_state->fb) + goto out; + + /* No configuring new scaling in the fast path. */ + if (crtc_w != plane_state->crtc_w || + crtc_h != plane_state->crtc_h || + src_w != plane_state->src_w || + src_h != plane_state->src_h) { + goto out; + } + + /* Set the cursor's position on the screen. This is the + * expected change from the drm_mode_cursor_universal() + * helper. + */ + plane_state->crtc_x = crtc_x; + plane_state->crtc_y = crtc_y; + + /* Allow changing the start position within the cursor BO, if + * that matters. + */ + plane_state->src_x = src_x; + plane_state->src_y = src_y; + + /* Update the display list based on the new crtc_x/y. */ + vc4_plane_atomic_check(plane, plane_state); + + /* Note that we can't just call vc4_plane_write_dlist() + * because that would smash the context data that the HVS is + * currently using. + */ + writel(vc4_state->dlist[vc4_state->pos0_offset], + &vc4_state->hw_dlist[vc4_state->pos0_offset]); + writel(vc4_state->dlist[vc4_state->pos2_offset], + &vc4_state->hw_dlist[vc4_state->pos2_offset]); + writel(vc4_state->dlist[vc4_state->ptr0_offset], + &vc4_state->hw_dlist[vc4_state->ptr0_offset]); + + return 0; + +out: + return drm_atomic_helper_update_plane(plane, crtc, fb, + crtc_x, crtc_y, + crtc_w, crtc_h, + src_x, src_y, + src_w, src_h); +} + static const struct drm_plane_funcs vc4_plane_funcs = { - .update_plane = drm_atomic_helper_update_plane, + .update_plane = vc4_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .destroy = vc4_plane_destroy, .set_property = NULL, From d8dbf44f13b91185c618219d912b246817a8d132 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 28 Dec 2015 13:25:41 -0800 Subject: [PATCH 05/10] drm/vc4: Make the CRTCs cooperate on allocating display lists. So far, we've only ever lit up one CRTC, so this has been fine. To extend to more displays or more planes, we need to make sure we don't run our display lists into each other. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 111 ++++++++++++++++++--------------- drivers/gpu/drm/vc4/vc4_drv.h | 8 ++- drivers/gpu/drm/vc4/vc4_hvs.c | 13 ++++ 3 files changed, 82 insertions(+), 50 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 937409792b97..6cf931557e97 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -49,22 +49,27 @@ struct vc4_crtc { /* Which HVS channel we're using for our CRTC. */ int channel; - /* Pointer to the actual hardware display list memory for the - * crtc. - */ - u32 __iomem *dlist; - - u32 dlist_size; /* in dwords */ - struct drm_pending_vblank_event *event; }; +struct vc4_crtc_state { + struct drm_crtc_state base; + /* Dlist area for this CRTC configuration. */ + struct drm_mm_node mm; +}; + static inline struct vc4_crtc * to_vc4_crtc(struct drm_crtc *crtc) { return (struct vc4_crtc *)crtc; } +static inline struct vc4_crtc_state * +to_vc4_crtc_state(struct drm_crtc_state *crtc_state) +{ + return (struct vc4_crtc_state *)crtc_state; +} + struct vc4_crtc_data { /* Which channel of the HVS this pixelvalve sources from. */ int hvs_channel; @@ -319,11 +324,13 @@ static void vc4_crtc_enable(struct drm_crtc *crtc) static int vc4_crtc_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state) { + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state); struct drm_device *dev = crtc->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); struct drm_plane *plane; - struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + unsigned long flags; u32 dlist_count = 0; + int ret; /* The pixelvalve can only feed one encoder (and encoders are * 1:1 with connectors.) @@ -346,18 +353,12 @@ static int vc4_crtc_atomic_check(struct drm_crtc *crtc, dlist_count++; /* Account for SCALER_CTL0_END. */ - if (!vc4_crtc->dlist || dlist_count > vc4_crtc->dlist_size) { - vc4_crtc->dlist = ((u32 __iomem *)vc4->hvs->dlist + - HVS_BOOTLOADER_DLIST_END); - vc4_crtc->dlist_size = ((SCALER_DLIST_SIZE >> 2) - - HVS_BOOTLOADER_DLIST_END); - - if (dlist_count > vc4_crtc->dlist_size) { - DRM_DEBUG_KMS("dlist too large for CRTC (%d > %d).\n", - dlist_count, vc4_crtc->dlist_size); - return -EINVAL; - } - } + spin_lock_irqsave(&vc4->hvs->mm_lock, flags); + ret = drm_mm_insert_node(&vc4->hvs->dlist_mm, &vc4_state->mm, + dlist_count, 1, 0); + spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags); + if (ret) + return ret; return 0; } @@ -368,47 +369,29 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_device *dev = crtc->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); struct drm_plane *plane; bool debug_dump_regs = false; - u32 __iomem *dlist_next = vc4_crtc->dlist; + u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start; + u32 __iomem *dlist_next = dlist_start; if (debug_dump_regs) { DRM_INFO("CRTC %d HVS before:\n", drm_crtc_index(crtc)); vc4_hvs_dump_state(dev); } - /* Copy all the active planes' dlist contents to the hardware dlist. - * - * XXX: If the new display list was large enough that it - * overlapped a currently-read display list, we need to do - * something like disable scanout before putting in the new - * list. For now, we're safe because we only have the two - * planes. - */ + /* Copy all the active planes' dlist contents to the hardware dlist. */ drm_atomic_crtc_for_each_plane(plane, crtc) { dlist_next += vc4_plane_write_dlist(plane, dlist_next); } - if (dlist_next == vc4_crtc->dlist) { - /* If no planes were enabled, use the SCALER_CTL0_END - * at the start of the display list memory (in the - * bootloader section). We'll rewrite that - * SCALER_CTL0_END, just in case, though. - */ - writel(SCALER_CTL0_END, vc4->hvs->dlist); - HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), 0); - } else { - writel(SCALER_CTL0_END, dlist_next); - dlist_next++; + writel(SCALER_CTL0_END, dlist_next); + dlist_next++; - HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), - (u32 __iomem *)vc4_crtc->dlist - - (u32 __iomem *)vc4->hvs->dlist); + WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size); - /* Make the next display list start after ours. */ - vc4_crtc->dlist_size -= (dlist_next - vc4_crtc->dlist); - vc4_crtc->dlist = dlist_next; - } + HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), + vc4_state->mm.start); if (debug_dump_regs) { DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); @@ -573,6 +556,36 @@ static int vc4_page_flip(struct drm_crtc *crtc, return drm_atomic_helper_page_flip(crtc, fb, event, flags); } +static struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc) +{ + struct vc4_crtc_state *vc4_state; + + vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL); + if (!vc4_state) + return NULL; + + __drm_atomic_helper_crtc_duplicate_state(crtc, &vc4_state->base); + return &vc4_state->base; +} + +static void vc4_crtc_destroy_state(struct drm_crtc *crtc, + struct drm_crtc_state *state) +{ + struct vc4_dev *vc4 = to_vc4_dev(crtc->dev); + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state); + + if (vc4_state->mm.allocated) { + unsigned long flags; + + spin_lock_irqsave(&vc4->hvs->mm_lock, flags); + drm_mm_remove_node(&vc4_state->mm); + spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags); + + } + + __drm_atomic_helper_crtc_destroy_state(crtc, state); +} + static const struct drm_crtc_funcs vc4_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .destroy = vc4_crtc_destroy, @@ -581,8 +594,8 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = { .cursor_set = NULL, /* handled by drm_mode_cursor_universal */ .cursor_move = NULL, /* handled by drm_mode_cursor_universal */ .reset = drm_atomic_helper_crtc_reset, - .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, - .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, + .atomic_duplicate_state = vc4_crtc_duplicate_state, + .atomic_destroy_state = vc4_crtc_destroy_state, }; static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 4c734d087d7f..ae9802486080 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -149,7 +149,13 @@ struct vc4_v3d { struct vc4_hvs { struct platform_device *pdev; void __iomem *regs; - void __iomem *dlist; + u32 __iomem *dlist; + + /* Memory manager for CRTCs to allocate space in the display + * list. Units are dwords. + */ + struct drm_mm dlist_mm; + spinlock_t mm_lock; }; struct vc4_plane { diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index 8098c5b21ba4..9e435545b3b6 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -119,6 +119,17 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) hvs->dlist = hvs->regs + SCALER_DLIST_START; + spin_lock_init(&hvs->mm_lock); + + /* Set up the HVS display list memory manager. We never + * overwrite the setup from the bootloader (just 128b out of + * our 16K), since we don't want to scramble the screen when + * transitioning from the firmware's boot setup to runtime. + */ + drm_mm_init(&hvs->dlist_mm, + HVS_BOOTLOADER_DLIST_END, + (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END); + vc4->hvs = hvs; return 0; } @@ -129,6 +140,8 @@ static void vc4_hvs_unbind(struct device *dev, struct device *master, struct drm_device *drm = dev_get_drvdata(master); struct vc4_dev *vc4 = drm->dev_private; + drm_mm_takedown(&vc4->hvs->dlist_mm); + vc4->hvs = NULL; } From fc2d6f1eabee9d971453da2a27a72471c2a347dd Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 20 Oct 2015 14:18:56 +0100 Subject: [PATCH 06/10] drm/vc4: Add more display planes to each CRTC. Previously we only did the primary and cursor plane, but overlay planes are useful and just require this setup to add, since all planes go into the HVS display list in the same way. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_crtc.c | 56 ++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 6cf931557e97..619dc781c517 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -657,9 +657,9 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_crtc *vc4_crtc; struct drm_crtc *crtc; - struct drm_plane *primary_plane, *cursor_plane; + struct drm_plane *primary_plane, *cursor_plane, *destroy_plane, *temp; const struct of_device_id *match; - int ret; + int ret, i; vc4_crtc = devm_kzalloc(dev, sizeof(*vc4_crtc), GFP_KERNEL); if (!vc4_crtc) @@ -688,27 +688,49 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) goto err; } - cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR); - if (IS_ERR(cursor_plane)) { - dev_err(dev, "failed to construct cursor plane\n"); - ret = PTR_ERR(cursor_plane); - goto err_primary; - } - - drm_crtc_init_with_planes(drm, crtc, primary_plane, cursor_plane, + drm_crtc_init_with_planes(drm, crtc, primary_plane, NULL, &vc4_crtc_funcs, NULL); drm_crtc_helper_add(crtc, &vc4_crtc_helper_funcs); primary_plane->crtc = crtc; - cursor_plane->crtc = crtc; vc4->crtc[drm_crtc_index(crtc)] = vc4_crtc; vc4_crtc->channel = vc4_crtc->data->hvs_channel; + /* Set up some arbitrary number of planes. We're not limited + * by a set number of physical registers, just the space in + * the HVS (16k) and how small an plane can be (28 bytes). + * However, each plane we set up takes up some memory, and + * increases the cost of looping over planes, which atomic + * modesetting does quite a bit. As a result, we pick a + * modest number of planes to expose, that should hopefully + * still cover any sane usecase. + */ + for (i = 0; i < 8; i++) { + struct drm_plane *plane = + vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY); + + if (IS_ERR(plane)) + continue; + + plane->possible_crtcs = 1 << drm_crtc_index(crtc); + } + + /* Set up the legacy cursor after overlay initialization, + * since we overlay planes on the CRTC in the order they were + * initialized. + */ + cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR); + if (!IS_ERR(cursor_plane)) { + cursor_plane->possible_crtcs = 1 << drm_crtc_index(crtc); + cursor_plane->crtc = crtc; + crtc->cursor = cursor_plane; + } + CRTC_WRITE(PV_INTEN, 0); CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START); ret = devm_request_irq(dev, platform_get_irq(pdev, 0), vc4_crtc_irq_handler, 0, "vc4 crtc", vc4_crtc); if (ret) - goto err_cursor; + goto err_destroy_planes; vc4_set_crtc_possible_masks(drm, crtc); @@ -716,10 +738,12 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) return 0; -err_cursor: - cursor_plane->funcs->destroy(cursor_plane); -err_primary: - primary_plane->funcs->destroy(primary_plane); +err_destroy_planes: + list_for_each_entry_safe(destroy_plane, temp, + &drm->mode_config.plane_list, head) { + if (destroy_plane->possible_crtcs == 1 << drm_crtc_index(crtc)) + destroy_plane->funcs->destroy(destroy_plane); + } err: return ret; } From f863e356013d628fa65b1cd89aa298eed26fc936 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 28 Dec 2015 14:45:25 -0800 Subject: [PATCH 07/10] drm/vc4: Fix which value is being used for source image size. This doesn't matter yet since we only allow 1:1 scaling, but the comment clearly says we should be using the source size. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 713ec006baa9..d9c929096164 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -47,6 +47,8 @@ struct vc4_plane_state { /* Clipped coordinates of the plane on the display. */ int crtc_x, crtc_y, crtc_w, crtc_h; + /* Clipped size of the area scanned from in the FB. */ + u32 src_w, src_h; /* Offset to start scanning out from the start of the plane's * BO. @@ -170,11 +172,6 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) vc4_state->offset = fb->offsets[0]; - vc4_state->crtc_x = state->crtc_x; - vc4_state->crtc_y = state->crtc_y; - vc4_state->crtc_w = state->crtc_w; - vc4_state->crtc_h = state->crtc_h; - if (state->crtc_w << 16 != state->src_w || state->crtc_h << 16 != state->src_h) { /* We don't support scaling yet, which involves @@ -185,17 +182,25 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) return -EINVAL; } + vc4_state->src_w = state->src_w >> 16; + vc4_state->src_h = state->src_h >> 16; + + vc4_state->crtc_x = state->crtc_x; + vc4_state->crtc_y = state->crtc_y; + vc4_state->crtc_w = state->crtc_w; + vc4_state->crtc_h = state->crtc_h; + if (vc4_state->crtc_x < 0) { vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format, 0) * -vc4_state->crtc_x); - vc4_state->crtc_w += vc4_state->crtc_x; + vc4_state->src_w += vc4_state->crtc_x; vc4_state->crtc_x = 0; } if (vc4_state->crtc_y < 0) { vc4_state->offset += fb->pitches[0] * -vc4_state->crtc_y; - vc4_state->crtc_h += vc4_state->crtc_y; + vc4_state->src_h += vc4_state->crtc_y; vc4_state->crtc_y = 0; } @@ -244,8 +249,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_POS2_ALPHA_MODE_PIPELINE : SCALER_POS2_ALPHA_MODE_FIXED, SCALER_POS2_ALPHA_MODE) | - VC4_SET_FIELD(vc4_state->crtc_w, SCALER_POS2_WIDTH) | - VC4_SET_FIELD(vc4_state->crtc_h, SCALER_POS2_HEIGHT)); + VC4_SET_FIELD(vc4_state->src_w, SCALER_POS2_WIDTH) | + VC4_SET_FIELD(vc4_state->src_h, SCALER_POS2_HEIGHT)); /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); From 21af94cf1a4c2d3450ab7fead58e6e2291ab92a9 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 20 Oct 2015 16:06:57 +0100 Subject: [PATCH 08/10] drm/vc4: Add support for scaling of display planes. This implements a simple policy for choosing scaling modes (trapezoidal for decimation, PPF for magnification), and a single PPF filter (Mitchell/Netravali's recommendation). Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_drv.h | 4 + drivers/gpu/drm/vc4/vc4_hvs.c | 84 +++++++++++ drivers/gpu/drm/vc4/vc4_plane.c | 255 ++++++++++++++++++++++++++++++-- drivers/gpu/drm/vc4/vc4_regs.h | 46 ++++++ 4 files changed, 375 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index ae9802486080..3d1df6b1c4d3 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -155,7 +155,11 @@ struct vc4_hvs { * list. Units are dwords. */ struct drm_mm dlist_mm; + /* Memory manager for the LBM memory used by HVS scaling. */ + struct drm_mm lbm_mm; spinlock_t mm_lock; + + struct drm_mm_node mitchell_netravali_filter; }; struct vc4_plane { diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index 9e435545b3b6..6fbab1c82cb1 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -100,12 +100,76 @@ int vc4_hvs_debugfs_regs(struct seq_file *m, void *unused) } #endif +/* The filter kernel is composed of dwords each containing 3 9-bit + * signed integers packed next to each other. + */ +#define VC4_INT_TO_COEFF(coeff) (coeff & 0x1ff) +#define VC4_PPF_FILTER_WORD(c0, c1, c2) \ + ((((c0) & 0x1ff) << 0) | \ + (((c1) & 0x1ff) << 9) | \ + (((c2) & 0x1ff) << 18)) + +/* The whole filter kernel is arranged as the coefficients 0-16 going + * up, then a pad, then 17-31 going down and reversed within the + * dwords. This means that a linear phase kernel (where it's + * symmetrical at the boundary between 15 and 16) has the last 5 + * dwords matching the first 5, but reversed. + */ +#define VC4_LINEAR_PHASE_KERNEL(c0, c1, c2, c3, c4, c5, c6, c7, c8, \ + c9, c10, c11, c12, c13, c14, c15) \ + {VC4_PPF_FILTER_WORD(c0, c1, c2), \ + VC4_PPF_FILTER_WORD(c3, c4, c5), \ + VC4_PPF_FILTER_WORD(c6, c7, c8), \ + VC4_PPF_FILTER_WORD(c9, c10, c11), \ + VC4_PPF_FILTER_WORD(c12, c13, c14), \ + VC4_PPF_FILTER_WORD(c15, c15, 0)} + +#define VC4_LINEAR_PHASE_KERNEL_DWORDS 6 +#define VC4_KERNEL_DWORDS (VC4_LINEAR_PHASE_KERNEL_DWORDS * 2 - 1) + +/* Recommended B=1/3, C=1/3 filter choice from Mitchell/Netravali. + * http://www.cs.utexas.edu/~fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf + */ +static const u32 mitchell_netravali_1_3_1_3_kernel[] = + VC4_LINEAR_PHASE_KERNEL(0, -2, -6, -8, -10, -8, -3, 2, 18, + 50, 82, 119, 155, 187, 213, 227); + +static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs, + struct drm_mm_node *space, + const u32 *kernel) +{ + int ret, i; + u32 __iomem *dst_kernel; + + ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS, 1, + 0); + if (ret) { + DRM_ERROR("Failed to allocate space for filter kernel: %d\n", + ret); + return ret; + } + + dst_kernel = hvs->dlist + space->start; + + for (i = 0; i < VC4_KERNEL_DWORDS; i++) { + if (i < VC4_LINEAR_PHASE_KERNEL_DWORDS) + writel(kernel[i], &dst_kernel[i]); + else { + writel(kernel[VC4_KERNEL_DWORDS - i - 1], + &dst_kernel[i]); + } + } + + return 0; +} + static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); struct drm_device *drm = dev_get_drvdata(master); struct vc4_dev *vc4 = drm->dev_private; struct vc4_hvs *hvs = NULL; + int ret; hvs = devm_kzalloc(&pdev->dev, sizeof(*hvs), GFP_KERNEL); if (!hvs) @@ -130,6 +194,22 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) HVS_BOOTLOADER_DLIST_END, (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END); + /* Set up the HVS LBM memory manager. We could have some more + * complicated data structure that allowed reuse of LBM areas + * between planes when they don't overlap on the screen, but + * for now we just allocate globally. + */ + drm_mm_init(&hvs->lbm_mm, 0, 96 * 1024); + + /* Upload filter kernels. We only have the one for now, so we + * keep it around for the lifetime of the driver. + */ + ret = vc4_hvs_upload_linear_kernel(hvs, + &hvs->mitchell_netravali_filter, + mitchell_netravali_1_3_1_3_kernel); + if (ret) + return ret; + vc4->hvs = hvs; return 0; } @@ -140,7 +220,11 @@ static void vc4_hvs_unbind(struct device *dev, struct device *master, struct drm_device *drm = dev_get_drvdata(master); struct vc4_dev *vc4 = drm->dev_private; + if (vc4->hvs->mitchell_netravali_filter.allocated) + drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter); + drm_mm_takedown(&vc4->hvs->dlist_mm); + drm_mm_takedown(&vc4->hvs->lbm_mm); vc4->hvs = NULL; } diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index d9c929096164..7c2d697e8715 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -24,6 +24,12 @@ #include "drm_fb_cma_helper.h" #include "drm_plane_helper.h" +enum vc4_scaling_mode { + VC4_SCALING_NONE, + VC4_SCALING_TPZ, + VC4_SCALING_PPF, +}; + struct vc4_plane_state { struct drm_plane_state base; /* System memory copy of the display list for this element, computed @@ -47,13 +53,19 @@ struct vc4_plane_state { /* Clipped coordinates of the plane on the display. */ int crtc_x, crtc_y, crtc_w, crtc_h; - /* Clipped size of the area scanned from in the FB. */ - u32 src_w, src_h; + /* Clipped area being scanned from in the FB. */ + u32 src_x, src_y, src_w, src_h; + + enum vc4_scaling_mode x_scaling, y_scaling; + bool is_unity; /* Offset to start scanning out from the start of the plane's * BO. */ u32 offset; + + /* Our allocation in LBM for temporary storage during scaling. */ + struct drm_mm_node lbm; }; static inline struct vc4_plane_state * @@ -90,6 +102,16 @@ static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) return NULL; } +static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) +{ + if (dst > src) + return VC4_SCALING_PPF; + else if (dst < src) + return VC4_SCALING_TPZ; + else + return VC4_SCALING_NONE; +} + static bool plane_enabled(struct drm_plane_state *state) { return state->fb && state->crtc; @@ -106,6 +128,8 @@ static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane if (!vc4_state) return NULL; + memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); + __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); if (vc4_state->dlist) { @@ -125,8 +149,17 @@ static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane static void vc4_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { + struct vc4_dev *vc4 = to_vc4_dev(plane->dev); struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + if (vc4_state->lbm.allocated) { + unsigned long irqflags; + + spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); + drm_mm_remove_node(&vc4_state->lbm); + spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); + } + kfree(vc4_state->dlist); __drm_atomic_helper_plane_destroy_state(plane, &vc4_state->base); kfree(state); @@ -165,23 +198,60 @@ static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) vc4_state->dlist[vc4_state->dlist_count++] = val; } -static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) +/* Returns the scl0/scl1 field based on whether the dimensions need to + * be up/down/non-scaled. + * + * This is a replication of a table from the spec. + */ +static u32 vc4_get_scl_field(struct drm_plane_state *state) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + + switch (vc4_state->x_scaling << 2 | vc4_state->y_scaling) { + case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: + return SCALER_CTL0_SCL_H_PPF_V_PPF; + case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: + return SCALER_CTL0_SCL_H_TPZ_V_PPF; + case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ: + return SCALER_CTL0_SCL_H_PPF_V_TPZ; + case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ: + return SCALER_CTL0_SCL_H_TPZ_V_TPZ; + case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE: + return SCALER_CTL0_SCL_H_PPF_V_NONE; + case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF: + return SCALER_CTL0_SCL_H_NONE_V_PPF; + case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ: + return SCALER_CTL0_SCL_H_NONE_V_TPZ; + case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE: + return SCALER_CTL0_SCL_H_TPZ_V_NONE; + default: + case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE: + /* The unity case is independently handled by + * SCALER_CTL0_UNITY. + */ + return 0; + } +} + +static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) +{ + struct drm_plane *plane = state->plane; + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; + u32 subpixel_src_mask = (1 << 16) - 1; vc4_state->offset = fb->offsets[0]; - if (state->crtc_w << 16 != state->src_w || - state->crtc_h << 16 != state->src_h) { - /* We don't support scaling yet, which involves - * allocating the LBM memory for scaling temporary - * storage, and putting filter kernels in the HVS - * context. - */ + /* We don't support subpixel source positioning for scaling. */ + if ((state->src_x & subpixel_src_mask) || + (state->src_y & subpixel_src_mask) || + (state->src_w & subpixel_src_mask) || + (state->src_h & subpixel_src_mask)) { return -EINVAL; } + vc4_state->src_x = state->src_x >> 16; + vc4_state->src_y = state->src_y >> 16; vc4_state->src_w = state->src_w >> 16; vc4_state->src_h = state->src_h >> 16; @@ -190,6 +260,23 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) vc4_state->crtc_w = state->crtc_w; vc4_state->crtc_h = state->crtc_h; + vc4_state->x_scaling = vc4_get_scaling_mode(vc4_state->src_w, + vc4_state->crtc_w); + vc4_state->y_scaling = vc4_get_scaling_mode(vc4_state->src_h, + vc4_state->crtc_h); + vc4_state->is_unity = (vc4_state->x_scaling == VC4_SCALING_NONE && + vc4_state->y_scaling == VC4_SCALING_NONE); + + /* No configuring scaling on the cursor plane, since it gets + non-vblank-synced updates, and scaling requires requires + LBM changes which have to be vblank-synced. + */ + if (plane->type == DRM_PLANE_TYPE_CURSOR && !vc4_state->is_unity) + return -EINVAL; + + /* Clamp the on-screen start x/y to 0. The hardware doesn't + * support negative y, and negative x wastes bandwidth. + */ if (vc4_state->crtc_x < 0) { vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format, 0) * @@ -207,6 +294,87 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) return 0; } +static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) +{ + u32 scale, recip; + + scale = (1 << 16) * src / dst; + + /* The specs note that while the reciprocal would be defined + * as (1<<32)/scale, ~0 is close enough. + */ + recip = ~0 / scale; + + vc4_dlist_write(vc4_state, + VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | + VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); + vc4_dlist_write(vc4_state, + VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); +} + +static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst) +{ + u32 scale = (1 << 16) * src / dst; + + vc4_dlist_write(vc4_state, + SCALER_PPF_AGC | + VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | + VC4_SET_FIELD(0, SCALER_PPF_IPHASE)); +} + +static u32 vc4_lbm_size(struct drm_plane_state *state) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + /* This is the worst case number. One of the two sizes will + * be used depending on the scaling configuration. + */ + u32 pix_per_line = max(vc4_state->src_w, (u32)vc4_state->crtc_w); + u32 lbm; + + if (vc4_state->is_unity) + return 0; + else if (vc4_state->y_scaling == VC4_SCALING_TPZ) + lbm = pix_per_line * 8; + else { + /* In special cases, this multiplier might be 12. */ + lbm = pix_per_line * 16; + } + + lbm = roundup(lbm, 32); + + return lbm; +} + +static void vc4_write_scaling_parameters(struct drm_plane_state *state) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + + /* Ch0 H-PPF Word 0: Scaling Parameters */ + if (vc4_state->x_scaling == VC4_SCALING_PPF) { + vc4_write_ppf(vc4_state, + vc4_state->src_w, vc4_state->crtc_w); + } + + /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ + if (vc4_state->y_scaling == VC4_SCALING_PPF) { + vc4_write_ppf(vc4_state, + vc4_state->src_h, vc4_state->crtc_h); + vc4_dlist_write(vc4_state, 0xc0c0c0c0); + } + + /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ + if (vc4_state->x_scaling == VC4_SCALING_TPZ) { + vc4_write_tpz(vc4_state, + vc4_state->src_w, vc4_state->crtc_w); + } + + /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ + if (vc4_state->y_scaling == VC4_SCALING_TPZ) { + vc4_write_tpz(vc4_state, + vc4_state->src_h, vc4_state->crtc_h); + vc4_dlist_write(vc4_state, 0xc0c0c0c0); + } +} /* Writes out a full display list for an active plane to the plane's * private dlist state. @@ -214,22 +382,50 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) static int vc4_plane_mode_set(struct drm_plane *plane, struct drm_plane_state *state) { + struct vc4_dev *vc4 = to_vc4_dev(plane->dev); struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); u32 ctl0_offset = vc4_state->dlist_count; const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format); + u32 scl; + u32 lbm_size; + unsigned long irqflags; int ret; ret = vc4_plane_setup_clipping_and_scaling(state); if (ret) return ret; + /* Allocate the LBM memory that the HVS will use for temporary + * storage due to our scaling/format conversion. + */ + lbm_size = vc4_lbm_size(state); + if (lbm_size) { + if (!vc4_state->lbm.allocated) { + spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); + ret = drm_mm_insert_node(&vc4->hvs->lbm_mm, + &vc4_state->lbm, + lbm_size, 32, 0); + spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); + } else { + WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); + } + } + + if (ret) + return ret; + + scl = vc4_get_scl_field(state); + + /* Control word */ vc4_dlist_write(vc4_state, SCALER_CTL0_VALID | (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | - SCALER_CTL0_UNITY); + (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | + VC4_SET_FIELD(scl, SCALER_CTL0_SCL0) | + VC4_SET_FIELD(scl, SCALER_CTL0_SCL1)); /* Position Word 0: Image Positions and Alpha Value */ vc4_state->pos0_offset = vc4_state->dlist_count; @@ -238,9 +434,14 @@ static int vc4_plane_mode_set(struct drm_plane *plane, VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); - /* Position Word 1: Scaled Image Dimensions. - * Skipped due to SCALER_CTL0_UNITY scaling. - */ + /* Position Word 1: Scaled Image Dimensions. */ + if (!vc4_state->is_unity) { + vc4_dlist_write(vc4_state, + VC4_SET_FIELD(vc4_state->crtc_w, + SCALER_POS1_SCL_WIDTH) | + VC4_SET_FIELD(vc4_state->crtc_h, + SCALER_POS1_SCL_HEIGHT)); + } /* Position Word 2: Source Image Size, Alpha Mode */ vc4_state->pos2_offset = vc4_state->dlist_count; @@ -266,6 +467,32 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_dlist_write(vc4_state, VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH)); + if (!vc4_state->is_unity) { + /* LBM Base Address. */ + if (vc4_state->y_scaling != VC4_SCALING_NONE) + vc4_dlist_write(vc4_state, vc4_state->lbm.start); + + vc4_write_scaling_parameters(state); + + /* If any PPF setup was done, then all the kernel + * pointers get uploaded. + */ + if (vc4_state->x_scaling == VC4_SCALING_PPF || + vc4_state->y_scaling == VC4_SCALING_PPF) { + u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, + SCALER_PPF_KERNEL_OFFSET); + + /* HPPF plane 0 */ + vc4_dlist_write(vc4_state, kernel); + /* VPPF plane 0 */ + vc4_dlist_write(vc4_state, kernel); + /* HPPF plane 1 */ + vc4_dlist_write(vc4_state, kernel); + /* VPPF plane 1 */ + vc4_dlist_write(vc4_state, kernel); + } + } + vc4_state->dlist[ctl0_offset] |= VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 4e52a0a88551..037c7fe67187 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -536,6 +536,21 @@ enum hvs_pixel_format { #define SCALER_CTL0_ORDER_MASK VC4_MASK(14, 13) #define SCALER_CTL0_ORDER_SHIFT 13 +#define SCALER_CTL0_SCL1_MASK VC4_MASK(10, 8) +#define SCALER_CTL0_SCL1_SHIFT 8 + +#define SCALER_CTL0_SCL0_MASK VC4_MASK(7, 5) +#define SCALER_CTL0_SCL0_SHIFT 5 + +#define SCALER_CTL0_SCL_H_PPF_V_PPF 0 +#define SCALER_CTL0_SCL_H_TPZ_V_PPF 1 +#define SCALER_CTL0_SCL_H_PPF_V_TPZ 2 +#define SCALER_CTL0_SCL_H_TPZ_V_TPZ 3 +#define SCALER_CTL0_SCL_H_PPF_V_NONE 4 +#define SCALER_CTL0_SCL_H_NONE_V_PPF 5 +#define SCALER_CTL0_SCL_H_NONE_V_TPZ 6 +#define SCALER_CTL0_SCL_H_TPZ_V_NONE 7 + /* Set to indicate no scaling. */ #define SCALER_CTL0_UNITY BIT(4) @@ -551,6 +566,12 @@ enum hvs_pixel_format { #define SCALER_POS0_START_X_MASK VC4_MASK(11, 0) #define SCALER_POS0_START_X_SHIFT 0 +#define SCALER_POS1_SCL_HEIGHT_MASK VC4_MASK(27, 16) +#define SCALER_POS1_SCL_HEIGHT_SHIFT 16 + +#define SCALER_POS1_SCL_WIDTH_MASK VC4_MASK(11, 0) +#define SCALER_POS1_SCL_WIDTH_SHIFT 0 + #define SCALER_POS2_ALPHA_MODE_MASK VC4_MASK(31, 30) #define SCALER_POS2_ALPHA_MODE_SHIFT 30 #define SCALER_POS2_ALPHA_MODE_PIPELINE 0 @@ -564,6 +585,31 @@ enum hvs_pixel_format { #define SCALER_POS2_WIDTH_MASK VC4_MASK(11, 0) #define SCALER_POS2_WIDTH_SHIFT 0 +#define SCALER_TPZ0_VERT_RECALC BIT(31) +#define SCALER_TPZ0_SCALE_MASK VC4_MASK(28, 8) +#define SCALER_TPZ0_SCALE_SHIFT 8 +#define SCALER_TPZ0_IPHASE_MASK VC4_MASK(7, 0) +#define SCALER_TPZ0_IPHASE_SHIFT 0 +#define SCALER_TPZ1_RECIP_MASK VC4_MASK(15, 0) +#define SCALER_TPZ1_RECIP_SHIFT 0 + +/* Skips interpolating coefficients to 64 phases, so just 8 are used. + * Required for nearest neighbor. + */ +#define SCALER_PPF_NOINTERP BIT(31) +/* Replaes the highest valued coefficient with one that makes all 4 + * sum to unity. + */ +#define SCALER_PPF_AGC BIT(30) +#define SCALER_PPF_SCALE_MASK VC4_MASK(24, 8) +#define SCALER_PPF_SCALE_SHIFT 8 +#define SCALER_PPF_IPHASE_MASK VC4_MASK(6, 0) +#define SCALER_PPF_IPHASE_SHIFT 0 + +#define SCALER_PPF_KERNEL_OFFSET_MASK VC4_MASK(13, 0) +#define SCALER_PPF_KERNEL_OFFSET_SHIFT 0 +#define SCALER_PPF_KERNEL_UNCACHED BIT(31) + #define SCALER_SRC_PITCH_MASK VC4_MASK(15, 0) #define SCALER_SRC_PITCH_SHIFT 0 From fe4cd8476928a66e109ab50a430362fcee8a5716 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 20 Oct 2015 13:59:15 +0100 Subject: [PATCH 09/10] drm/vc4: Add support a few more RGB display plane formats. These were all touch-tested with modetest. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 7c2d697e8715..013ebff60fb5 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -88,6 +88,22 @@ static const struct hvs_format { .drm = DRM_FORMAT_ARGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true, }, + { + .drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565, + .pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false, + }, + { + .drm = DRM_FORMAT_BGR565, .hvs = HVS_PIXEL_FORMAT_RGB565, + .pixel_order = HVS_PIXEL_ORDER_XBGR, .has_alpha = false, + }, + { + .drm = DRM_FORMAT_ARGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551, + .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true, + }, + { + .drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551, + .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false, + }, }; static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) From fc04023fafecf19ebd09278d8d67dc5ed1f68b46 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 30 Dec 2015 12:25:44 -0800 Subject: [PATCH 10/10] drm/vc4: Add support for YUV planes. This supports 420 and 422 subsampling with 2 or 3 planes, tested with modetest. It doesn't set up chroma subsampling position (which it appears KMS doesn't deal with yet). The LBM memory is overallocated in many cases, but apparently the docs aren't quite correct and I'll probably need to look at the hardware source to really figure it out. Signed-off-by: Eric Anholt --- drivers/gpu/drm/vc4/vc4_plane.c | 256 ++++++++++++++++++++++++-------- drivers/gpu/drm/vc4/vc4_regs.h | 56 ++++++- 2 files changed, 253 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 013ebff60fb5..7b0c72ae02a0 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -54,15 +54,19 @@ struct vc4_plane_state { /* Clipped coordinates of the plane on the display. */ int crtc_x, crtc_y, crtc_w, crtc_h; /* Clipped area being scanned from in the FB. */ - u32 src_x, src_y, src_w, src_h; + u32 src_x, src_y; - enum vc4_scaling_mode x_scaling, y_scaling; + u32 src_w[2], src_h[2]; + + /* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */ + enum vc4_scaling_mode x_scaling[2], y_scaling[2]; bool is_unity; + bool is_yuv; /* Offset to start scanning out from the start of the plane's * BO. */ - u32 offset; + u32 offsets[3]; /* Our allocation in LBM for temporary storage during scaling. */ struct drm_mm_node lbm; @@ -79,6 +83,7 @@ static const struct hvs_format { u32 hvs; /* HVS_FORMAT_* */ u32 pixel_order; bool has_alpha; + bool flip_cbcr; } hvs_formats[] = { { .drm = DRM_FORMAT_XRGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888, @@ -104,6 +109,32 @@ static const struct hvs_format { .drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551, .pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false, }, + { + .drm = DRM_FORMAT_YUV422, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, + }, + { + .drm = DRM_FORMAT_YVU422, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, + .flip_cbcr = true, + }, + { + .drm = DRM_FORMAT_YUV420, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, + }, + { + .drm = DRM_FORMAT_YVU420, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, + .flip_cbcr = true, + }, + { + .drm = DRM_FORMAT_NV12, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, + }, + { + .drm = DRM_FORMAT_NV16, + .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, + }, }; static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) @@ -219,11 +250,11 @@ static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) * * This is a replication of a table from the spec. */ -static u32 vc4_get_scl_field(struct drm_plane_state *state) +static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); - switch (vc4_state->x_scaling << 2 | vc4_state->y_scaling) { + switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) { case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: return SCALER_CTL0_SCL_H_PPF_V_PPF; case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: @@ -254,9 +285,16 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) struct drm_plane *plane = state->plane; struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; + struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); u32 subpixel_src_mask = (1 << 16) - 1; + u32 format = fb->pixel_format; + int num_planes = drm_format_num_planes(format); + u32 h_subsample = 1; + u32 v_subsample = 1; + int i; - vc4_state->offset = fb->offsets[0]; + for (i = 0; i < num_planes; i++) + vc4_state->offsets[i] = bo->paddr + fb->offsets[i]; /* We don't support subpixel source positioning for scaling. */ if ((state->src_x & subpixel_src_mask) || @@ -268,20 +306,48 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) vc4_state->src_x = state->src_x >> 16; vc4_state->src_y = state->src_y >> 16; - vc4_state->src_w = state->src_w >> 16; - vc4_state->src_h = state->src_h >> 16; + vc4_state->src_w[0] = state->src_w >> 16; + vc4_state->src_h[0] = state->src_h >> 16; vc4_state->crtc_x = state->crtc_x; vc4_state->crtc_y = state->crtc_y; vc4_state->crtc_w = state->crtc_w; vc4_state->crtc_h = state->crtc_h; - vc4_state->x_scaling = vc4_get_scaling_mode(vc4_state->src_w, - vc4_state->crtc_w); - vc4_state->y_scaling = vc4_get_scaling_mode(vc4_state->src_h, - vc4_state->crtc_h); - vc4_state->is_unity = (vc4_state->x_scaling == VC4_SCALING_NONE && - vc4_state->y_scaling == VC4_SCALING_NONE); + vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0], + vc4_state->crtc_w); + vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], + vc4_state->crtc_h); + + if (num_planes > 1) { + vc4_state->is_yuv = true; + + h_subsample = drm_format_horz_chroma_subsampling(format); + v_subsample = drm_format_vert_chroma_subsampling(format); + vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample; + vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample; + + vc4_state->x_scaling[1] = + vc4_get_scaling_mode(vc4_state->src_w[1], + vc4_state->crtc_w); + vc4_state->y_scaling[1] = + vc4_get_scaling_mode(vc4_state->src_h[1], + vc4_state->crtc_h); + + /* YUV conversion requires that scaling be enabled, + * even on a plane that's otherwise 1:1. Choose TPZ + * for simplicity. + */ + if (vc4_state->x_scaling[0] == VC4_SCALING_NONE) + vc4_state->x_scaling[0] = VC4_SCALING_TPZ; + if (vc4_state->y_scaling[0] == VC4_SCALING_NONE) + vc4_state->y_scaling[0] = VC4_SCALING_TPZ; + } + + vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && + vc4_state->y_scaling[0] == VC4_SCALING_NONE && + vc4_state->x_scaling[1] == VC4_SCALING_NONE && + vc4_state->y_scaling[1] == VC4_SCALING_NONE); /* No configuring scaling on the cursor plane, since it gets non-vblank-synced updates, and scaling requires requires @@ -294,16 +360,27 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) * support negative y, and negative x wastes bandwidth. */ if (vc4_state->crtc_x < 0) { - vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format, - 0) * - -vc4_state->crtc_x); - vc4_state->src_w += vc4_state->crtc_x; + for (i = 0; i < num_planes; i++) { + u32 cpp = drm_format_plane_cpp(fb->pixel_format, i); + u32 subs = ((i == 0) ? 1 : h_subsample); + + vc4_state->offsets[i] += (cpp * + (-vc4_state->crtc_x) / subs); + } + vc4_state->src_w[0] += vc4_state->crtc_x; + vc4_state->src_w[1] += vc4_state->crtc_x / h_subsample; vc4_state->crtc_x = 0; } if (vc4_state->crtc_y < 0) { - vc4_state->offset += fb->pitches[0] * -vc4_state->crtc_y; - vc4_state->src_h += vc4_state->crtc_y; + for (i = 0; i < num_planes; i++) { + u32 subs = ((i == 0) ? 1 : v_subsample); + + vc4_state->offsets[i] += (fb->pitches[i] * + (-vc4_state->crtc_y) / subs); + } + vc4_state->src_h[0] += vc4_state->crtc_y; + vc4_state->src_h[1] += vc4_state->crtc_y / v_subsample; vc4_state->crtc_y = 0; } @@ -344,15 +421,23 @@ static u32 vc4_lbm_size(struct drm_plane_state *state) /* This is the worst case number. One of the two sizes will * be used depending on the scaling configuration. */ - u32 pix_per_line = max(vc4_state->src_w, (u32)vc4_state->crtc_w); + u32 pix_per_line = max(vc4_state->src_w[0], (u32)vc4_state->crtc_w); u32 lbm; - if (vc4_state->is_unity) - return 0; - else if (vc4_state->y_scaling == VC4_SCALING_TPZ) - lbm = pix_per_line * 8; - else { - /* In special cases, this multiplier might be 12. */ + if (!vc4_state->is_yuv) { + if (vc4_state->is_unity) + return 0; + else if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) + lbm = pix_per_line * 8; + else { + /* In special cases, this multiplier might be 12. */ + lbm = pix_per_line * 16; + } + } else { + /* There are cases for this going down to a multiplier + * of 2, but according to the firmware source, the + * table in the docs is somewhat wrong. + */ lbm = pix_per_line * 16; } @@ -361,33 +446,34 @@ static u32 vc4_lbm_size(struct drm_plane_state *state) return lbm; } -static void vc4_write_scaling_parameters(struct drm_plane_state *state) +static void vc4_write_scaling_parameters(struct drm_plane_state *state, + int channel) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); /* Ch0 H-PPF Word 0: Scaling Parameters */ - if (vc4_state->x_scaling == VC4_SCALING_PPF) { + if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { vc4_write_ppf(vc4_state, - vc4_state->src_w, vc4_state->crtc_w); + vc4_state->src_w[channel], vc4_state->crtc_w); } /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ - if (vc4_state->y_scaling == VC4_SCALING_PPF) { + if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { vc4_write_ppf(vc4_state, - vc4_state->src_h, vc4_state->crtc_h); + vc4_state->src_h[channel], vc4_state->crtc_h); vc4_dlist_write(vc4_state, 0xc0c0c0c0); } /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ - if (vc4_state->x_scaling == VC4_SCALING_TPZ) { + if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) { vc4_write_tpz(vc4_state, - vc4_state->src_w, vc4_state->crtc_w); + vc4_state->src_w[channel], vc4_state->crtc_w); } /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ - if (vc4_state->y_scaling == VC4_SCALING_TPZ) { + if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { vc4_write_tpz(vc4_state, - vc4_state->src_h, vc4_state->crtc_h); + vc4_state->src_h[channel], vc4_state->crtc_h); vc4_dlist_write(vc4_state, 0xc0c0c0c0); } } @@ -401,13 +487,13 @@ static int vc4_plane_mode_set(struct drm_plane *plane, struct vc4_dev *vc4 = to_vc4_dev(plane->dev); struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; - struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); u32 ctl0_offset = vc4_state->dlist_count; const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format); - u32 scl; + int num_planes = drm_format_num_planes(format->drm); + u32 scl0, scl1; u32 lbm_size; unsigned long irqflags; - int ret; + int ret, i; ret = vc4_plane_setup_clipping_and_scaling(state); if (ret) @@ -432,7 +518,19 @@ static int vc4_plane_mode_set(struct drm_plane *plane, if (ret) return ret; - scl = vc4_get_scl_field(state); + /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB + * and 4:4:4, scl1 should be set to scl0 so both channels of + * the scaler do the same thing. For YUV, the Y plane needs + * to be put in channel 1 and Cb/Cr in channel 0, so we swap + * the scl fields here. + */ + if (num_planes == 1) { + scl0 = vc4_get_scl_field(state, 1); + scl1 = scl0; + } else { + scl0 = vc4_get_scl_field(state, 1); + scl1 = vc4_get_scl_field(state, 0); + } /* Control word */ vc4_dlist_write(vc4_state, @@ -440,8 +538,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | - VC4_SET_FIELD(scl, SCALER_CTL0_SCL0) | - VC4_SET_FIELD(scl, SCALER_CTL0_SCL1)); + VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | + VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); /* Position Word 0: Image Positions and Alpha Value */ vc4_state->pos0_offset = vc4_state->dlist_count; @@ -466,35 +564,68 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_POS2_ALPHA_MODE_PIPELINE : SCALER_POS2_ALPHA_MODE_FIXED, SCALER_POS2_ALPHA_MODE) | - VC4_SET_FIELD(vc4_state->src_w, SCALER_POS2_WIDTH) | - VC4_SET_FIELD(vc4_state->src_h, SCALER_POS2_HEIGHT)); + VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) | + VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT)); /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); - /* Pointer Word 0: RGB / Y Pointer */ + + /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers + * + * The pointers may be any byte address. + */ vc4_state->ptr0_offset = vc4_state->dlist_count; - vc4_dlist_write(vc4_state, bo->paddr + vc4_state->offset); + if (!format->flip_cbcr) { + for (i = 0; i < num_planes; i++) + vc4_dlist_write(vc4_state, vc4_state->offsets[i]); + } else { + WARN_ON_ONCE(num_planes != 3); + vc4_dlist_write(vc4_state, vc4_state->offsets[0]); + vc4_dlist_write(vc4_state, vc4_state->offsets[2]); + vc4_dlist_write(vc4_state, vc4_state->offsets[1]); + } - /* Pointer Context Word 0: Written by the HVS */ - vc4_dlist_write(vc4_state, 0xc0c0c0c0); + /* Pointer Context Word 0/1/2: Written by the HVS */ + for (i = 0; i < num_planes; i++) + vc4_dlist_write(vc4_state, 0xc0c0c0c0); - /* Pitch word 0: Pointer 0 Pitch */ - vc4_dlist_write(vc4_state, - VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH)); + /* Pitch word 0/1/2 */ + for (i = 0; i < num_planes; i++) { + vc4_dlist_write(vc4_state, + VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH)); + } + + /* Colorspace conversion words */ + if (vc4_state->is_yuv) { + vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5); + vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5); + vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5); + } if (!vc4_state->is_unity) { /* LBM Base Address. */ - if (vc4_state->y_scaling != VC4_SCALING_NONE) + if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || + vc4_state->y_scaling[1] != VC4_SCALING_NONE) { vc4_dlist_write(vc4_state, vc4_state->lbm.start); + } - vc4_write_scaling_parameters(state); + if (num_planes > 1) { + /* Emit Cb/Cr as channel 0 and Y as channel + * 1. This matches how we set up scl0/scl1 + * above. + */ + vc4_write_scaling_parameters(state, 1); + } + vc4_write_scaling_parameters(state, 0); /* If any PPF setup was done, then all the kernel * pointers get uploaded. */ - if (vc4_state->x_scaling == VC4_SCALING_PPF || - vc4_state->y_scaling == VC4_SCALING_PPF) { + if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || + vc4_state->y_scaling[0] == VC4_SCALING_PPF || + vc4_state->x_scaling[1] == VC4_SCALING_PPF || + vc4_state->y_scaling[1] == VC4_SCALING_PPF) { u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, SCALER_PPF_KERNEL_OFFSET); @@ -698,6 +829,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, struct drm_plane *plane = NULL; struct vc4_plane *vc4_plane; u32 formats[ARRAY_SIZE(hvs_formats)]; + u32 num_formats = 0; int ret = 0; unsigned i; @@ -708,12 +840,20 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, goto fail; } - for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) - formats[i] = hvs_formats[i].drm; + for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { + /* Don't allow YUV in cursor planes, since that means + * tuning on the scaler, which we don't allow for the + * cursor. + */ + if (type != DRM_PLANE_TYPE_CURSOR || + hvs_formats[i].hvs < HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE) { + formats[num_formats++] = hvs_formats[i].drm; + } + } plane = &vc4_plane->base; ret = drm_universal_plane_init(dev, plane, 0xff, &vc4_plane_funcs, - formats, ARRAY_SIZE(formats), + formats, num_formats, type, NULL); drm_plane_helper_add(plane, &vc4_plane_helper_funcs); diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 037c7fe67187..25df20ef939c 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -503,7 +503,12 @@ enum hvs_pixel_format { HVS_PIXEL_FORMAT_RGB888 = 5, HVS_PIXEL_FORMAT_RGBA6666 = 6, /* 32bpp */ - HVS_PIXEL_FORMAT_RGBA8888 = 7 + HVS_PIXEL_FORMAT_RGBA8888 = 7, + + HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE = 8, + HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE = 9, + HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE = 10, + HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE = 11, }; /* Note: the LSB is the rightmost character shown. Only valid for @@ -585,6 +590,55 @@ enum hvs_pixel_format { #define SCALER_POS2_WIDTH_MASK VC4_MASK(11, 0) #define SCALER_POS2_WIDTH_SHIFT 0 +/* Color Space Conversion words. Some values are S2.8 signed + * integers, except that the 2 integer bits map as {0x0: 0, 0x1: 1, + * 0x2: 2, 0x3: -1} + */ +/* bottom 8 bits of S2.8 contribution of Cr to Blue */ +#define SCALER_CSC0_COEF_CR_BLU_MASK VC4_MASK(31, 24) +#define SCALER_CSC0_COEF_CR_BLU_SHIFT 24 +/* Signed offset to apply to Y before CSC. (Y' = Y + YY_OFS) */ +#define SCALER_CSC0_COEF_YY_OFS_MASK VC4_MASK(23, 16) +#define SCALER_CSC0_COEF_YY_OFS_SHIFT 16 +/* Signed offset to apply to CB before CSC (Cb' = Cb - 128 + CB_OFS). */ +#define SCALER_CSC0_COEF_CB_OFS_MASK VC4_MASK(15, 8) +#define SCALER_CSC0_COEF_CB_OFS_SHIFT 8 +/* Signed offset to apply to CB before CSC (Cr' = Cr - 128 + CR_OFS). */ +#define SCALER_CSC0_COEF_CR_OFS_MASK VC4_MASK(7, 0) +#define SCALER_CSC0_COEF_CR_OFS_SHIFT 0 +#define SCALER_CSC0_ITR_R_601_5 0x00f00000 +#define SCALER_CSC0_ITR_R_709_3 0x00f00000 +#define SCALER_CSC0_JPEG_JFIF 0x00000000 + +/* S2.8 contribution of Cb to Green */ +#define SCALER_CSC1_COEF_CB_GRN_MASK VC4_MASK(31, 22) +#define SCALER_CSC1_COEF_CB_GRN_SHIFT 22 +/* S2.8 contribution of Cr to Green */ +#define SCALER_CSC1_COEF_CR_GRN_MASK VC4_MASK(21, 12) +#define SCALER_CSC1_COEF_CR_GRN_SHIFT 12 +/* S2.8 contribution of Y to all of RGB */ +#define SCALER_CSC1_COEF_YY_ALL_MASK VC4_MASK(11, 2) +#define SCALER_CSC1_COEF_YY_ALL_SHIFT 2 +/* top 2 bits of S2.8 contribution of Cr to Blue */ +#define SCALER_CSC1_COEF_CR_BLU_MASK VC4_MASK(1, 0) +#define SCALER_CSC1_COEF_CR_BLU_SHIFT 0 +#define SCALER_CSC1_ITR_R_601_5 0xe73304a8 +#define SCALER_CSC1_ITR_R_709_3 0xf2b784a8 +#define SCALER_CSC1_JPEG_JFIF 0xea34a400 + +/* S2.8 contribution of Cb to Red */ +#define SCALER_CSC2_COEF_CB_RED_MASK VC4_MASK(29, 20) +#define SCALER_CSC2_COEF_CB_RED_SHIFT 20 +/* S2.8 contribution of Cr to Red */ +#define SCALER_CSC2_COEF_CR_RED_MASK VC4_MASK(19, 10) +#define SCALER_CSC2_COEF_CR_RED_SHIFT 10 +/* S2.8 contribution of Cb to Blue */ +#define SCALER_CSC2_COEF_CB_BLU_MASK VC4_MASK(19, 10) +#define SCALER_CSC2_COEF_CB_BLU_SHIFT 10 +#define SCALER_CSC2_ITR_R_601_5 0x00066204 +#define SCALER_CSC2_ITR_R_709_3 0x00072a1c +#define SCALER_CSC2_JPEG_JFIF 0x000599c5 + #define SCALER_TPZ0_VERT_RECALC BIT(31) #define SCALER_TPZ0_SCALE_MASK VC4_MASK(28, 8) #define SCALER_TPZ0_SCALE_SHIFT 8