1836 lines
49 KiB
C
Raw Normal View History

/*
* Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
/**
* DOC: Frame Buffer Compression (FBC)
*
* FBC tries to save memory bandwidth (and so power consumption) by
* compressing the amount of memory used by the display. It is total
* transparent to user space and completely handled in the kernel.
*
* The benefits of FBC are mostly visible with solid backgrounds and
* variation-less patterns. It comes from keeping the memory footprint small
* and having fewer memory pages opened and accessed for refreshing the display.
*
* i915 is responsible to reserve stolen memory for FBC and configure its
* offset on proper registers. The hardware takes care of all
* compress/decompress. However there are many known cases where we have to
* forcibly disable it to allow proper screen updates.
*/
#include <linux/string_helpers.h>
drm: Split out drm_probe_helper.h Having the probe helper stuff (which pretty much everyone needs) in the drm_crtc_helper.h file (which atomic drivers should never need) is confusing. Split them out. To make sure I actually achieved the goal here I went through all drivers. And indeed, all atomic drivers are now free of drm_crtc_helper.h includes. v2: Make it compile. There was so much compile fail on arm drivers that I figured I'll better not include any of the acks on v1. v3: Massive rebase because i915 has lost a lot of drmP.h includes, but not all: Through drm_crtc_helper.h > drm_modeset_helper.h -> drmP.h there was still one, which this patch largely removes. Which means rolling out lots more includes all over. This will also conflict with ongoing drmP.h cleanup by others I expect. v3: Rebase on top of atomic bochs. v4: Review from Laurent for bridge/rcar/omap/shmob/core bits: - (re)move some of the added includes, use the better include files in other places (all suggested from Laurent adopted unchanged). - sort alphabetically v5: Actually try to sort them, and while at it, sort all the ones I touch. v6: Rebase onto i915 changes. v7: Rebase once more. Acked-by: Harry Wentland <harry.wentland@amd.com> Acked-by: Sam Ravnborg <sam@ravnborg.org> Cc: Sam Ravnborg <sam@ravnborg.org> Cc: Jani Nikula <jani.nikula@linux.intel.com> Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Acked-by: Benjamin Gaignard <benjamin.gaignard@linaro.org> Acked-by: Jani Nikula <jani.nikula@intel.com> Acked-by: Neil Armstrong <narmstrong@baylibre.com> Acked-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> Acked-by: CK Hu <ck.hu@mediatek.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Acked-by: Sam Ravnborg <sam@ravnborg.org> Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Acked-by: Liviu Dudau <liviu.dudau@arm.com> Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Cc: linux-arm-kernel@lists.infradead.org Cc: virtualization@lists.linux-foundation.org Cc: etnaviv@lists.freedesktop.org Cc: linux-samsung-soc@vger.kernel.org Cc: intel-gfx@lists.freedesktop.org Cc: linux-mediatek@lists.infradead.org Cc: linux-amlogic@lists.infradead.org Cc: linux-arm-msm@vger.kernel.org Cc: freedreno@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: spice-devel@lists.freedesktop.org Cc: amd-gfx@lists.freedesktop.org Cc: linux-renesas-soc@vger.kernel.org Cc: linux-rockchip@lists.infradead.org Cc: linux-stm32@st-md-mailman.stormreply.com Cc: linux-tegra@vger.kernel.org Cc: xen-devel@lists.xen.org Link: https://patchwork.freedesktop.org/patch/msgid/20190117210334.13234-1-daniel.vetter@ffwll.ch
2019-01-17 22:03:34 +01:00
#include <drm/drm_fourcc.h>
#include "i915_drv.h"
#include "i915_vgpu.h"
#include "intel_cdclk.h"
#include "intel_de.h"
#include "intel_display_trace.h"
#include "intel_display_types.h"
#include "intel_fbc.h"
#include "intel_frontbuffer.h"
#define for_each_fbc_id(__dev_priv, __fbc_id) \
for ((__fbc_id) = INTEL_FBC_A; (__fbc_id) < I915_MAX_FBCS; (__fbc_id)++) \
for_each_if(INTEL_INFO(__dev_priv)->display.fbc_mask & BIT(__fbc_id))
#define for_each_intel_fbc(__dev_priv, __fbc, __fbc_id) \
for_each_fbc_id((__dev_priv), (__fbc_id)) \
for_each_if((__fbc) = (__dev_priv)->fbc[(__fbc_id)])
struct intel_fbc_funcs {
void (*activate)(struct intel_fbc *fbc);
void (*deactivate)(struct intel_fbc *fbc);
bool (*is_active)(struct intel_fbc *fbc);
bool (*is_compressing)(struct intel_fbc *fbc);
void (*nuke)(struct intel_fbc *fbc);
void (*program_cfb)(struct intel_fbc *fbc);
void (*set_false_color)(struct intel_fbc *fbc, bool enable);
};
struct intel_fbc_state {
struct intel_plane *plane;
unsigned int cfb_stride;
unsigned int cfb_size;
unsigned int fence_y_offset;
u16 override_cfb_stride;
u16 interval;
s8 fence_id;
};
struct intel_fbc {
struct drm_i915_private *i915;
const struct intel_fbc_funcs *funcs;
/*
* This is always the inner lock when overlapping with
* struct_mutex and it's the outer lock when overlapping
* with stolen_lock.
*/
struct mutex lock;
unsigned int possible_framebuffer_bits;
unsigned int busy_bits;
struct drm_mm_node compressed_fb;
struct drm_mm_node compressed_llb;
enum intel_fbc_id id;
u8 limit;
bool false_color;
bool active;
bool activated;
bool flip_pending;
bool underrun_detected;
struct work_struct underrun_work;
/*
* This structure contains everything that's relevant to program the
* hardware registers. When we want to figure out if we need to disable
* and re-enable FBC for a new configuration we just check if there's
* something different in the struct. The genx_fbc_activate functions
* are supposed to read from it in order to program the registers.
*/
struct intel_fbc_state state;
const char *no_fbc_reason;
};
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
/* plane stride in pixels */
static unsigned int intel_fbc_plane_stride(const struct intel_plane_state *plane_state)
drm/i915: alloc/free the FBC CFB during enable/disable One of the problems with the current code is that it frees the CFB and releases its drm_mm node as soon as we flip FBC's enable bit. This is bad because after we disable FBC the hardware may still use the CFB for the rest of the frame, so in theory we should only release the drm_mm node one frame after we disable FBC. Otherwise, a stolen memory allocation done right after an FBC disable may result in either corrupted memory for the new owner of that memory region or corrupted screen/underruns in case the new owner changes it while the hardware is still reading it. This case is not exactly easy to reproduce since we currently don't do a lot of stolen memory allocations, but I see patches on the mailing list trying to expose stolen memory to user space, so races will be possible. I thought about three different approaches to solve this, and they all have downsides. The first approach would be to simply use multiple drm_mm nodes and freeing the unused ones only after a frame has passed. The problem with this approach is that since stolen memory is rather small, there's a risk we just won't be able to allocate a new CFB from stolen if the previous one was not freed yet. This could happen in case we quickly disable FBC from pipe A and decide to enable it on pipe B, or just if we change pipe A's fb stride while FBC is enabled. The second approach would be similar to the first one, but maintaining a single drm_mm node and keeping track of when it can be reused. This would remove the disadvantage of not having enough space for two nodes, but would create the new problem where we may not be able to enable FBC at the point intel_fbc_update() is called, so we would have to add more code to retry updating FBC after the time has passed. And that can quickly get too complex since we can get invalidate, flush, disable and other calls in the middle of the wait. Both solutions above - and also the current code - have the problem that we unnecessarily free+realloc FBC during invalidate+flush operations even if the CFB size doesn't change. The third option would be to move the allocation/deallocation to enable/disable. This makes sure that the pipe is always disabled when we allocate/deallocate the CFB, so there's no risk that the FBC hardware may read or write to the memory right after it is freed from drm_mm. The downside is that it is possible for user space to change the buffer stride without triggering a disable/enable - only deactivate/activate -, so we'll have to handle this case somehow - see igt's kms_frontbuffer_tracking test, fbc-stridechange subtest. It could be possible to implement a way to free+alloc the CFB during said stride change, but it would involve a lot of book-keeping - exactly as mentioned above - just for on case, so for now I'll keep it simple and just deactivate FBC. Besides, we may not even need to disable FBC since we do CFB over-allocation. Note from Chris: "Starting a fullscreen client that covers a single monitor in a multi-monitor setup will trigger a change in stride on one of the CRTCs (the monitors will be flipped independently).". It shouldn't be a huge problem if we lose FBC on multi-monitor setups since these setups already have problems reaching deep PC states anyway. v2: Rebase after changing the patch order. v3: - Remove references to the stride change case being "uncommon" and paste Chris' example. - Rebase after a change in a previous patch. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 14:19:21 -03:00
{
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
const struct drm_framebuffer *fb = plane_state->hw.fb;
unsigned int stride;
stride = plane_state->view.color_plane[0].mapping_stride;
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
if (!drm_rotation_90_or_270(plane_state->hw.rotation))
stride /= fb->format->cpp[0];
return stride;
}
/* plane stride based cfb stride in bytes, assuming 1:1 compression limit */
static unsigned int _intel_fbc_cfb_stride(const struct intel_plane_state *plane_state)
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
{
unsigned int cpp = 4; /* FBC always 4 bytes per pixel */
return intel_fbc_plane_stride(plane_state) * cpp;
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
}
/* minimum acceptable cfb stride in bytes, assuming 1:1 compression limit */
static unsigned int skl_fbc_min_cfb_stride(const struct intel_plane_state *plane_state)
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
{
struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev);
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
unsigned int limit = 4; /* 1:4 compression limit is the worst case */
unsigned int cpp = 4; /* FBC always 4 bytes per pixel */
unsigned int width = drm_rect_width(&plane_state->uapi.src) >> 16;
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
unsigned int height = 4; /* FBC segment is 4 lines */
unsigned int stride;
/* minimum segment stride we can use */
stride = width * cpp * height / limit;
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
/*
* Wa_16011863758: icl+
* Avoid some hardware segment address miscalculation.
*/
if (DISPLAY_VER(i915) >= 11)
stride += 64;
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
/*
* At least some of the platforms require each 4 line segment to
* be 512 byte aligned. Just do it always for simplicity.
*/
stride = ALIGN(stride, 512);
/* convert back to single line equivalent with 1:1 compression limit */
return stride * limit / height;
}
/* properly aligned cfb stride in bytes, assuming 1:1 compression limit */
static unsigned int intel_fbc_cfb_stride(const struct intel_plane_state *plane_state)
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
{
struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev);
unsigned int stride = _intel_fbc_cfb_stride(plane_state);
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
/*
* At least some of the platforms require each 4 line segment to
* be 512 byte aligned. Aligning each line to 512 bytes guarantees
* that regardless of the compression limit we choose later.
*/
if (DISPLAY_VER(i915) >= 9)
return max(ALIGN(stride, 512), skl_fbc_min_cfb_stride(plane_state));
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
else
return stride;
}
static unsigned int intel_fbc_cfb_size(const struct intel_plane_state *plane_state)
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
{
struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev);
int lines = drm_rect_height(&plane_state->uapi.src) >> 16;
drm/i915: alloc/free the FBC CFB during enable/disable One of the problems with the current code is that it frees the CFB and releases its drm_mm node as soon as we flip FBC's enable bit. This is bad because after we disable FBC the hardware may still use the CFB for the rest of the frame, so in theory we should only release the drm_mm node one frame after we disable FBC. Otherwise, a stolen memory allocation done right after an FBC disable may result in either corrupted memory for the new owner of that memory region or corrupted screen/underruns in case the new owner changes it while the hardware is still reading it. This case is not exactly easy to reproduce since we currently don't do a lot of stolen memory allocations, but I see patches on the mailing list trying to expose stolen memory to user space, so races will be possible. I thought about three different approaches to solve this, and they all have downsides. The first approach would be to simply use multiple drm_mm nodes and freeing the unused ones only after a frame has passed. The problem with this approach is that since stolen memory is rather small, there's a risk we just won't be able to allocate a new CFB from stolen if the previous one was not freed yet. This could happen in case we quickly disable FBC from pipe A and decide to enable it on pipe B, or just if we change pipe A's fb stride while FBC is enabled. The second approach would be similar to the first one, but maintaining a single drm_mm node and keeping track of when it can be reused. This would remove the disadvantage of not having enough space for two nodes, but would create the new problem where we may not be able to enable FBC at the point intel_fbc_update() is called, so we would have to add more code to retry updating FBC after the time has passed. And that can quickly get too complex since we can get invalidate, flush, disable and other calls in the middle of the wait. Both solutions above - and also the current code - have the problem that we unnecessarily free+realloc FBC during invalidate+flush operations even if the CFB size doesn't change. The third option would be to move the allocation/deallocation to enable/disable. This makes sure that the pipe is always disabled when we allocate/deallocate the CFB, so there's no risk that the FBC hardware may read or write to the memory right after it is freed from drm_mm. The downside is that it is possible for user space to change the buffer stride without triggering a disable/enable - only deactivate/activate -, so we'll have to handle this case somehow - see igt's kms_frontbuffer_tracking test, fbc-stridechange subtest. It could be possible to implement a way to free+alloc the CFB during said stride change, but it would involve a lot of book-keeping - exactly as mentioned above - just for on case, so for now I'll keep it simple and just deactivate FBC. Besides, we may not even need to disable FBC since we do CFB over-allocation. Note from Chris: "Starting a fullscreen client that covers a single monitor in a multi-monitor setup will trigger a change in stride on one of the CRTCs (the monitors will be flipped independently).". It shouldn't be a huge problem if we lose FBC on multi-monitor setups since these setups already have problems reaching deep PC states anyway. v2: Rebase after changing the patch order. v3: - Remove references to the stride change case being "uncommon" and paste Chris' example. - Rebase after a change in a previous patch. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 14:19:21 -03:00
if (DISPLAY_VER(i915) == 7)
drm/i915: alloc/free the FBC CFB during enable/disable One of the problems with the current code is that it frees the CFB and releases its drm_mm node as soon as we flip FBC's enable bit. This is bad because after we disable FBC the hardware may still use the CFB for the rest of the frame, so in theory we should only release the drm_mm node one frame after we disable FBC. Otherwise, a stolen memory allocation done right after an FBC disable may result in either corrupted memory for the new owner of that memory region or corrupted screen/underruns in case the new owner changes it while the hardware is still reading it. This case is not exactly easy to reproduce since we currently don't do a lot of stolen memory allocations, but I see patches on the mailing list trying to expose stolen memory to user space, so races will be possible. I thought about three different approaches to solve this, and they all have downsides. The first approach would be to simply use multiple drm_mm nodes and freeing the unused ones only after a frame has passed. The problem with this approach is that since stolen memory is rather small, there's a risk we just won't be able to allocate a new CFB from stolen if the previous one was not freed yet. This could happen in case we quickly disable FBC from pipe A and decide to enable it on pipe B, or just if we change pipe A's fb stride while FBC is enabled. The second approach would be similar to the first one, but maintaining a single drm_mm node and keeping track of when it can be reused. This would remove the disadvantage of not having enough space for two nodes, but would create the new problem where we may not be able to enable FBC at the point intel_fbc_update() is called, so we would have to add more code to retry updating FBC after the time has passed. And that can quickly get too complex since we can get invalidate, flush, disable and other calls in the middle of the wait. Both solutions above - and also the current code - have the problem that we unnecessarily free+realloc FBC during invalidate+flush operations even if the CFB size doesn't change. The third option would be to move the allocation/deallocation to enable/disable. This makes sure that the pipe is always disabled when we allocate/deallocate the CFB, so there's no risk that the FBC hardware may read or write to the memory right after it is freed from drm_mm. The downside is that it is possible for user space to change the buffer stride without triggering a disable/enable - only deactivate/activate -, so we'll have to handle this case somehow - see igt's kms_frontbuffer_tracking test, fbc-stridechange subtest. It could be possible to implement a way to free+alloc the CFB during said stride change, but it would involve a lot of book-keeping - exactly as mentioned above - just for on case, so for now I'll keep it simple and just deactivate FBC. Besides, we may not even need to disable FBC since we do CFB over-allocation. Note from Chris: "Starting a fullscreen client that covers a single monitor in a multi-monitor setup will trigger a change in stride on one of the CRTCs (the monitors will be flipped independently).". It shouldn't be a huge problem if we lose FBC on multi-monitor setups since these setups already have problems reaching deep PC states anyway. v2: Rebase after changing the patch order. v3: - Remove references to the stride change case being "uncommon" and paste Chris' example. - Rebase after a change in a previous patch. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 14:19:21 -03:00
lines = min(lines, 2048);
else if (DISPLAY_VER(i915) >= 8)
lines = min(lines, 2560);
drm/i915: alloc/free the FBC CFB during enable/disable One of the problems with the current code is that it frees the CFB and releases its drm_mm node as soon as we flip FBC's enable bit. This is bad because after we disable FBC the hardware may still use the CFB for the rest of the frame, so in theory we should only release the drm_mm node one frame after we disable FBC. Otherwise, a stolen memory allocation done right after an FBC disable may result in either corrupted memory for the new owner of that memory region or corrupted screen/underruns in case the new owner changes it while the hardware is still reading it. This case is not exactly easy to reproduce since we currently don't do a lot of stolen memory allocations, but I see patches on the mailing list trying to expose stolen memory to user space, so races will be possible. I thought about three different approaches to solve this, and they all have downsides. The first approach would be to simply use multiple drm_mm nodes and freeing the unused ones only after a frame has passed. The problem with this approach is that since stolen memory is rather small, there's a risk we just won't be able to allocate a new CFB from stolen if the previous one was not freed yet. This could happen in case we quickly disable FBC from pipe A and decide to enable it on pipe B, or just if we change pipe A's fb stride while FBC is enabled. The second approach would be similar to the first one, but maintaining a single drm_mm node and keeping track of when it can be reused. This would remove the disadvantage of not having enough space for two nodes, but would create the new problem where we may not be able to enable FBC at the point intel_fbc_update() is called, so we would have to add more code to retry updating FBC after the time has passed. And that can quickly get too complex since we can get invalidate, flush, disable and other calls in the middle of the wait. Both solutions above - and also the current code - have the problem that we unnecessarily free+realloc FBC during invalidate+flush operations even if the CFB size doesn't change. The third option would be to move the allocation/deallocation to enable/disable. This makes sure that the pipe is always disabled when we allocate/deallocate the CFB, so there's no risk that the FBC hardware may read or write to the memory right after it is freed from drm_mm. The downside is that it is possible for user space to change the buffer stride without triggering a disable/enable - only deactivate/activate -, so we'll have to handle this case somehow - see igt's kms_frontbuffer_tracking test, fbc-stridechange subtest. It could be possible to implement a way to free+alloc the CFB during said stride change, but it would involve a lot of book-keeping - exactly as mentioned above - just for on case, so for now I'll keep it simple and just deactivate FBC. Besides, we may not even need to disable FBC since we do CFB over-allocation. Note from Chris: "Starting a fullscreen client that covers a single monitor in a multi-monitor setup will trigger a change in stride on one of the CRTCs (the monitors will be flipped independently).". It shouldn't be a huge problem if we lose FBC on multi-monitor setups since these setups already have problems reaching deep PC states anyway. v2: Rebase after changing the patch order. v3: - Remove references to the stride change case being "uncommon" and paste Chris' example. - Rebase after a change in a previous patch. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 14:19:21 -03:00
return lines * intel_fbc_cfb_stride(plane_state);
drm/i915: alloc/free the FBC CFB during enable/disable One of the problems with the current code is that it frees the CFB and releases its drm_mm node as soon as we flip FBC's enable bit. This is bad because after we disable FBC the hardware may still use the CFB for the rest of the frame, so in theory we should only release the drm_mm node one frame after we disable FBC. Otherwise, a stolen memory allocation done right after an FBC disable may result in either corrupted memory for the new owner of that memory region or corrupted screen/underruns in case the new owner changes it while the hardware is still reading it. This case is not exactly easy to reproduce since we currently don't do a lot of stolen memory allocations, but I see patches on the mailing list trying to expose stolen memory to user space, so races will be possible. I thought about three different approaches to solve this, and they all have downsides. The first approach would be to simply use multiple drm_mm nodes and freeing the unused ones only after a frame has passed. The problem with this approach is that since stolen memory is rather small, there's a risk we just won't be able to allocate a new CFB from stolen if the previous one was not freed yet. This could happen in case we quickly disable FBC from pipe A and decide to enable it on pipe B, or just if we change pipe A's fb stride while FBC is enabled. The second approach would be similar to the first one, but maintaining a single drm_mm node and keeping track of when it can be reused. This would remove the disadvantage of not having enough space for two nodes, but would create the new problem where we may not be able to enable FBC at the point intel_fbc_update() is called, so we would have to add more code to retry updating FBC after the time has passed. And that can quickly get too complex since we can get invalidate, flush, disable and other calls in the middle of the wait. Both solutions above - and also the current code - have the problem that we unnecessarily free+realloc FBC during invalidate+flush operations even if the CFB size doesn't change. The third option would be to move the allocation/deallocation to enable/disable. This makes sure that the pipe is always disabled when we allocate/deallocate the CFB, so there's no risk that the FBC hardware may read or write to the memory right after it is freed from drm_mm. The downside is that it is possible for user space to change the buffer stride without triggering a disable/enable - only deactivate/activate -, so we'll have to handle this case somehow - see igt's kms_frontbuffer_tracking test, fbc-stridechange subtest. It could be possible to implement a way to free+alloc the CFB during said stride change, but it would involve a lot of book-keeping - exactly as mentioned above - just for on case, so for now I'll keep it simple and just deactivate FBC. Besides, we may not even need to disable FBC since we do CFB over-allocation. Note from Chris: "Starting a fullscreen client that covers a single monitor in a multi-monitor setup will trigger a change in stride on one of the CRTCs (the monitors will be flipped independently).". It shouldn't be a huge problem if we lose FBC on multi-monitor setups since these setups already have problems reaching deep PC states anyway. v2: Rebase after changing the patch order. v3: - Remove references to the stride change case being "uncommon" and paste Chris' example. - Rebase after a change in a previous patch. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 14:19:21 -03:00
}
static u16 intel_fbc_override_cfb_stride(const struct intel_plane_state *plane_state)
{
struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev);
unsigned int stride_aligned = intel_fbc_cfb_stride(plane_state);
unsigned int stride = _intel_fbc_cfb_stride(plane_state);
const struct drm_framebuffer *fb = plane_state->hw.fb;
/*
* Override stride in 64 byte units per 4 line segment.
*
* Gen9 hw miscalculates cfb stride for linear as
* PLANE_STRIDE*512 instead of PLANE_STRIDE*64, so
* we always need to use the override there.
*/
if (stride != stride_aligned ||
(DISPLAY_VER(i915) == 9 && fb->modifier == DRM_FORMAT_MOD_LINEAR))
return stride_aligned * 4 / 64;
return 0;
}
static u32 i8xx_fbc_ctl(struct intel_fbc *fbc)
{
const struct intel_fbc_state *fbc_state = &fbc->state;
struct drm_i915_private *i915 = fbc->i915;
unsigned int cfb_stride;
u32 fbc_ctl;
cfb_stride = fbc_state->cfb_stride / fbc->limit;
/* FBC_CTL wants 32B or 64B units */
if (DISPLAY_VER(i915) == 2)
cfb_stride = (cfb_stride / 32) - 1;
else
cfb_stride = (cfb_stride / 64) - 1;
fbc_ctl = FBC_CTL_PERIODIC |
FBC_CTL_INTERVAL(fbc_state->interval) |
FBC_CTL_STRIDE(cfb_stride);
if (IS_I945GM(i915))
fbc_ctl |= FBC_CTL_C3_IDLE; /* 945 needs special SR handling */
if (fbc_state->fence_id >= 0)
fbc_ctl |= FBC_CTL_FENCENO(fbc_state->fence_id);
return fbc_ctl;
}
static u32 i965_fbc_ctl2(struct intel_fbc *fbc)
{
const struct intel_fbc_state *fbc_state = &fbc->state;
u32 fbc_ctl2;
fbc_ctl2 = FBC_CTL_FENCE_DBL | FBC_CTL_IDLE_IMM |
FBC_CTL_PLANE(fbc_state->plane->i9xx_plane);
if (fbc_state->fence_id >= 0)
fbc_ctl2 |= FBC_CTL_CPU_FENCE_EN;
return fbc_ctl2;
}
static void i8xx_fbc_deactivate(struct intel_fbc *fbc)
{
struct drm_i915_private *i915 = fbc->i915;
u32 fbc_ctl;
/* Disable compression */
fbc_ctl = intel_de_read(i915, FBC_CONTROL);
if ((fbc_ctl & FBC_CTL_EN) == 0)
return;
fbc_ctl &= ~FBC_CTL_EN;
intel_de_write(i915, FBC_CONTROL, fbc_ctl);
/* Wait for compressing bit to clear */
if (intel_de_wait_for_clear(i915, FBC_STATUS,
FBC_STAT_COMPRESSING, 10)) {
drm_dbg_kms(&i915->drm, "FBC idle timed out\n");
return;
}
}
static void i8xx_fbc_activate(struct intel_fbc *fbc)
{
const struct intel_fbc_state *fbc_state = &fbc->state;
struct drm_i915_private *i915 = fbc->i915;
int i;
/* Clear old tags */
for (i = 0; i < (FBC_LL_SIZE / 32) + 1; i++)
intel_de_write(i915, FBC_TAG(i), 0);
if (DISPLAY_VER(i915) == 4) {
intel_de_write(i915, FBC_CONTROL2,
i965_fbc_ctl2(fbc));
intel_de_write(i915, FBC_FENCE_OFF,
fbc_state->fence_y_offset);
}
intel_de_write(i915, FBC_CONTROL,
FBC_CTL_EN | i8xx_fbc_ctl(fbc));
}
static bool i8xx_fbc_is_active(struct intel_fbc *fbc)
{
return intel_de_read(fbc->i915, FBC_CONTROL) & FBC_CTL_EN;
}
static bool i8xx_fbc_is_compressing(struct intel_fbc *fbc)
{
return intel_de_read(fbc->i915, FBC_STATUS) &
(FBC_STAT_COMPRESSING | FBC_STAT_COMPRESSED);
}
static void i8xx_fbc_nuke(struct intel_fbc *fbc)
{
struct intel_fbc_state *fbc_state = &fbc->state;
enum i9xx_plane_id i9xx_plane = fbc_state->plane->i9xx_plane;
struct drm_i915_private *dev_priv = fbc->i915;
spin_lock_irq(&dev_priv->uncore.lock);
intel_de_write_fw(dev_priv, DSPADDR(i9xx_plane),
intel_de_read_fw(dev_priv, DSPADDR(i9xx_plane)));
spin_unlock_irq(&dev_priv->uncore.lock);
}
static void i8xx_fbc_program_cfb(struct intel_fbc *fbc)
{
struct drm_i915_private *i915 = fbc->i915;
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
fbc->compressed_fb.start, U32_MAX));
GEM_BUG_ON(range_overflows_end_t(u64, i915->dsm.start,
fbc->compressed_llb.start, U32_MAX));
intel_de_write(i915, FBC_CFB_BASE,
i915->dsm.start + fbc->compressed_fb.start);
intel_de_write(i915, FBC_LL_BASE,
i915->dsm.start + fbc->compressed_llb.start);
}
static const struct intel_fbc_funcs i8xx_fbc_funcs = {
.activate = i8xx_fbc_activate,
.deactivate = i8xx_fbc_deactivate,
.is_active = i8xx_fbc_is_active,
.is_compressing = i8xx_fbc_is_compressing,
.nuke = i8xx_fbc_nuke,
.program_cfb = i8xx_fbc_program_cfb,
};
static void i965_fbc_nuke(struct intel_fbc *fbc)
{
struct intel_fbc_state *fbc_state = &fbc->state;
enum i9xx_plane_id i9xx_plane = fbc_state->plane->i9xx_plane;
struct drm_i915_private *dev_priv = fbc->i915;
spin_lock_irq(&dev_priv->uncore.lock);
intel_de_write_fw(dev_priv, DSPSURF(i9xx_plane),
intel_de_read_fw(dev_priv, DSPSURF(i9xx_plane)));
spin_unlock_irq(&dev_priv->uncore.lock);
}
static const struct intel_fbc_funcs i965_fbc_funcs = {
.activate = i8xx_fbc_activate,
.deactivate = i8xx_fbc_deactivate,
.is_active = i8xx_fbc_is_active,
.is_compressing = i8xx_fbc_is_compressing,
.nuke = i965_fbc_nuke,
.program_cfb = i8xx_fbc_program_cfb,
};
static u32 g4x_dpfc_ctl_limit(struct intel_fbc *fbc)
{
switch (fbc->limit) {
default:
MISSING_CASE(fbc->limit);
fallthrough;
case 1:
return DPFC_CTL_LIMIT_1X;
case 2:
return DPFC_CTL_LIMIT_2X;
case 4:
return DPFC_CTL_LIMIT_4X;
}
}
static u32 g4x_dpfc_ctl(struct intel_fbc *fbc)
{
const struct intel_fbc_state *fbc_state = &fbc->state;
struct drm_i915_private *i915 = fbc->i915;
u32 dpfc_ctl;
dpfc_ctl = g4x_dpfc_ctl_limit(fbc) |
DPFC_CTL_PLANE_G4X(fbc_state->plane->i9xx_plane);
if (IS_G4X(i915))
dpfc_ctl |= DPFC_CTL_SR_EN;
if (fbc_state->fence_id >= 0) {
dpfc_ctl |= DPFC_CTL_FENCE_EN_G4X;
if (DISPLAY_VER(i915) < 6)
dpfc_ctl |= DPFC_CTL_FENCENO(fbc_state->fence_id);
}
return dpfc_ctl;
}
static void g4x_fbc_activate(struct intel_fbc *fbc)
{
const struct intel_fbc_state *fbc_state = &fbc->state;
struct drm_i915_private *i915 = fbc->i915;
intel_de_write(i915, DPFC_FENCE_YOFF,
fbc_state->fence_y_offset);
intel_de_write(i915, DPFC_CONTROL,
DPFC_CTL_EN | g4x_dpfc_ctl(fbc));
}
static void g4x_fbc_deactivate(struct intel_fbc *fbc)
{
struct drm_i915_private *i915 = fbc->i915;
u32 dpfc_ctl;
/* Disable compression */
dpfc_ctl = intel_de_read(i915, DPFC_CONTROL);
if (dpfc_ctl & DPFC_CTL_EN) {
dpfc_ctl &= ~DPFC_CTL_EN;
intel_de_write(i915, DPFC_CONTROL, dpfc_ctl);
}
}
static bool g4x_fbc_is_active(struct intel_fbc *fbc)
{
return intel_de_read(fbc->i915, DPFC_CONTROL) & DPFC_CTL_EN;
}
static bool g4x_fbc_is_compressing(struct intel_fbc *fbc)
{
return intel_de_read(fbc->i915, DPFC_STATUS) & DPFC_COMP_SEG_MASK;
}
static void g4x_fbc_program_cfb(struct intel_fbc *fbc)
{
struct drm_i915_private *i915 = fbc->i915;
intel_de_write(i915, DPFC_CB_BASE, fbc->compressed_fb.start);
}
static const struct intel_fbc_funcs g4x_fbc_funcs = {
.activate = g4x_fbc_activate,
.deactivate = g4x_fbc_deactivate,
.is_active = g4x_fbc_is_active,
.is_compressing = g4x_fbc_is_compressing,
.nuke = i965_fbc_nuke,
.program_cfb = g4x_fbc_program_cfb,
};
static void ilk_fbc_activate(struct intel_fbc *fbc)
{
struct intel_fbc_state *fbc_state = &fbc->state;
struct drm_i915_private *i915 = fbc->i915;
intel_de_write(i915, ILK_DPFC_FENCE_YOFF(fbc->id),
fbc_state->fence_y_offset);
intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id),
DPFC_CTL_EN | g4x_dpfc_ctl(fbc));
}
static void ilk_fbc_deactivate(struct intel_fbc *fbc)
{
struct drm_i915_private *i915 = fbc->i915;
u32 dpfc_ctl;
/* Disable compression */
dpfc_ctl = intel_de_read(i915, ILK_DPFC_CONTROL(fbc->id));
if (dpfc_ctl & DPFC_CTL_EN) {
dpfc_ctl &= ~DPFC_CTL_EN;
intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id), dpfc_ctl);
}
}
static bool ilk_fbc_is_active(struct intel_fbc *fbc)
{
return intel_de_read(fbc->i915, ILK_DPFC_CONTROL(fbc->id)) & DPFC_CTL_EN;
}
static bool ilk_fbc_is_compressing(struct intel_fbc *fbc)
{
return intel_de_read(fbc->i915, ILK_DPFC_STATUS(fbc->id)) & DPFC_COMP_SEG_MASK;
}
static void ilk_fbc_program_cfb(struct intel_fbc *fbc)
{
struct drm_i915_private *i915 = fbc->i915;
intel_de_write(i915, ILK_DPFC_CB_BASE(fbc->id), fbc->compressed_fb.start);
}
static const struct intel_fbc_funcs ilk_fbc_funcs = {
.activate = ilk_fbc_activate,
.deactivate = ilk_fbc_deactivate,
.is_active = ilk_fbc_is_active,
.is_compressing = ilk_fbc_is_compressing,
.nuke = i965_fbc_nuke,
.program_cfb = ilk_fbc_program_cfb,
};
static void snb_fbc_program_fence(struct intel_fbc *fbc)
{
const struct intel_fbc_state *fbc_state = &fbc->state;
struct drm_i915_private *i915 = fbc->i915;
u32 ctl = 0;
if (fbc_state->fence_id >= 0)
ctl = SNB_DPFC_FENCE_EN | SNB_DPFC_FENCENO(fbc_state->fence_id);
intel_de_write(i915, SNB_DPFC_CTL_SA, ctl);
intel_de_write(i915, SNB_DPFC_CPU_FENCE_OFFSET, fbc_state->fence_y_offset);
}
static void snb_fbc_activate(struct intel_fbc *fbc)
{
snb_fbc_program_fence(fbc);
ilk_fbc_activate(fbc);
}
static void snb_fbc_nuke(struct intel_fbc *fbc)
{
struct drm_i915_private *i915 = fbc->i915;
intel_de_write(i915, MSG_FBC_REND_STATE(fbc->id), FBC_REND_NUKE);
intel_de_posting_read(i915, MSG_FBC_REND_STATE(fbc->id));
}
static const struct intel_fbc_funcs snb_fbc_funcs = {
.activate = snb_fbc_activate,
.deactivate = ilk_fbc_deactivate,
.is_active = ilk_fbc_is_active,
.is_compressing = ilk_fbc_is_compressing,
.nuke = snb_fbc_nuke,
.program_cfb = ilk_fbc_program_cfb,
};
static void glk_fbc_program_cfb_stride(struct intel_fbc *fbc)
{
const struct intel_fbc_state *fbc_state = &fbc->state;
struct drm_i915_private *i915 = fbc->i915;
u32 val = 0;
if (fbc_state->override_cfb_stride)
val |= FBC_STRIDE_OVERRIDE |
FBC_STRIDE(fbc_state->override_cfb_stride / fbc->limit);
intel_de_write(i915, GLK_FBC_STRIDE(fbc->id), val);
}
static void skl_fbc_program_cfb_stride(struct intel_fbc *fbc)
{
const struct intel_fbc_state *fbc_state = &fbc->state;
struct drm_i915_private *i915 = fbc->i915;
u32 val = 0;
/* Display WA #0529: skl, kbl, bxt. */
if (fbc_state->override_cfb_stride)
val |= CHICKEN_FBC_STRIDE_OVERRIDE |
CHICKEN_FBC_STRIDE(fbc_state->override_cfb_stride / fbc->limit);
intel_de_rmw(i915, CHICKEN_MISC_4,
CHICKEN_FBC_STRIDE_OVERRIDE |
CHICKEN_FBC_STRIDE_MASK, val);
}
static u32 ivb_dpfc_ctl(struct intel_fbc *fbc)
{
const struct intel_fbc_state *fbc_state = &fbc->state;
struct drm_i915_private *i915 = fbc->i915;
u32 dpfc_ctl;
dpfc_ctl = g4x_dpfc_ctl_limit(fbc);
if (IS_IVYBRIDGE(i915))
dpfc_ctl |= DPFC_CTL_PLANE_IVB(fbc_state->plane->i9xx_plane);
if (fbc_state->fence_id >= 0)
dpfc_ctl |= DPFC_CTL_FENCE_EN_IVB;
if (fbc->false_color)
dpfc_ctl |= DPFC_CTL_FALSE_COLOR;
return dpfc_ctl;
}
static void ivb_fbc_activate(struct intel_fbc *fbc)
{
struct drm_i915_private *i915 = fbc->i915;
if (DISPLAY_VER(i915) >= 10)
glk_fbc_program_cfb_stride(fbc);
else if (DISPLAY_VER(i915) == 9)
skl_fbc_program_cfb_stride(fbc);
if (to_gt(i915)->ggtt->num_fences)
snb_fbc_program_fence(fbc);
intel_de_write(i915, ILK_DPFC_CONTROL(fbc->id),
DPFC_CTL_EN | ivb_dpfc_ctl(fbc));
}
static bool ivb_fbc_is_compressing(struct intel_fbc *fbc)
{
return intel_de_read(fbc->i915, ILK_DPFC_STATUS2(fbc->id)) & DPFC_COMP_SEG_MASK_IVB;
}
static void ivb_fbc_set_false_color(struct intel_fbc *fbc,
bool enable)
{
intel_de_rmw(fbc->i915, ILK_DPFC_CONTROL(fbc->id),
DPFC_CTL_FALSE_COLOR, enable ? DPFC_CTL_FALSE_COLOR : 0);
}
static const struct intel_fbc_funcs ivb_fbc_funcs = {
.activate = ivb_fbc_activate,
.deactivate = ilk_fbc_deactivate,
.is_active = ilk_fbc_is_active,
.is_compressing = ivb_fbc_is_compressing,
.nuke = snb_fbc_nuke,
.program_cfb = ilk_fbc_program_cfb,
.set_false_color = ivb_fbc_set_false_color,
};
static bool intel_fbc_hw_is_active(struct intel_fbc *fbc)
{
return fbc->funcs->is_active(fbc);
}
static void intel_fbc_hw_activate(struct intel_fbc *fbc)
{
trace_intel_fbc_activate(fbc->state.plane);
fbc->active = true;
fbc->activated = true;
fbc->funcs->activate(fbc);
}
static void intel_fbc_hw_deactivate(struct intel_fbc *fbc)
{
trace_intel_fbc_deactivate(fbc->state.plane);
fbc->active = false;
fbc->funcs->deactivate(fbc);
}
static bool intel_fbc_is_compressing(struct intel_fbc *fbc)
{
return fbc->funcs->is_compressing(fbc);
}
static void intel_fbc_nuke(struct intel_fbc *fbc)
{
trace_intel_fbc_nuke(fbc->state.plane);
fbc->funcs->nuke(fbc);
}
static void intel_fbc_activate(struct intel_fbc *fbc)
{
intel_fbc_hw_activate(fbc);
intel_fbc_nuke(fbc);
fbc->no_fbc_reason = NULL;
}
static void intel_fbc_deactivate(struct intel_fbc *fbc, const char *reason)
{
struct drm_i915_private *i915 = fbc->i915;
drm_WARN_ON(&i915->drm, !mutex_is_locked(&fbc->lock));
if (fbc->active)
intel_fbc_hw_deactivate(fbc);
fbc->no_fbc_reason = reason;
}
static u64 intel_fbc_cfb_base_max(struct drm_i915_private *i915)
{
if (DISPLAY_VER(i915) >= 5 || IS_G4X(i915))
return BIT_ULL(28);
else
return BIT_ULL(32);
}
static u64 intel_fbc_stolen_end(struct drm_i915_private *i915)
{
u64 end;
/* The FBC hardware for BDW/SKL doesn't have access to the stolen
* reserved range size, so it always assumes the maximum (8mb) is used.
* If we enable FBC using a CFB on that memory range we'll get FIFO
* underruns, even if that range is not reserved by the BIOS. */
if (IS_BROADWELL(i915) ||
(DISPLAY_VER(i915) == 9 && !IS_BROXTON(i915)))
end = resource_size(&i915->dsm) - 8 * 1024 * 1024;
else
2016-12-15 11:23:55 -02:00
end = U64_MAX;
return min(end, intel_fbc_cfb_base_max(i915));
}
static int intel_fbc_min_limit(const struct intel_plane_state *plane_state)
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
{
return plane_state->hw.fb->format->cpp[0] == 2 ? 2 : 1;
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
}
static int intel_fbc_max_limit(struct drm_i915_private *i915)
{
/* WaFbcOnly1to1Ratio:ctg */
if (IS_G4X(i915))
return 1;
/*
* FBC2 can only do 1:1, 1:2, 1:4, we limit
* FBC1 to the same out of convenience.
*/
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
return 4;
}
static int find_compression_limit(struct intel_fbc *fbc,
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
unsigned int size, int min_limit)
{
struct drm_i915_private *i915 = fbc->i915;
u64 end = intel_fbc_stolen_end(i915);
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
int ret, limit = min_limit;
size /= limit;
/* Try to over-allocate to reduce reallocations and fragmentation. */
ret = i915_gem_stolen_insert_node_in_range(i915, &fbc->compressed_fb,
size <<= 1, 4096, 0, end);
if (ret == 0)
return limit;
for (; limit <= intel_fbc_max_limit(i915); limit <<= 1) {
ret = i915_gem_stolen_insert_node_in_range(i915, &fbc->compressed_fb,
size >>= 1, 4096, 0, end);
if (ret == 0)
return limit;
}
return 0;
}
static int intel_fbc_alloc_cfb(struct intel_fbc *fbc,
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
unsigned int size, int min_limit)
{
struct drm_i915_private *i915 = fbc->i915;
int ret;
drm/i915: alloc/free the FBC CFB during enable/disable One of the problems with the current code is that it frees the CFB and releases its drm_mm node as soon as we flip FBC's enable bit. This is bad because after we disable FBC the hardware may still use the CFB for the rest of the frame, so in theory we should only release the drm_mm node one frame after we disable FBC. Otherwise, a stolen memory allocation done right after an FBC disable may result in either corrupted memory for the new owner of that memory region or corrupted screen/underruns in case the new owner changes it while the hardware is still reading it. This case is not exactly easy to reproduce since we currently don't do a lot of stolen memory allocations, but I see patches on the mailing list trying to expose stolen memory to user space, so races will be possible. I thought about three different approaches to solve this, and they all have downsides. The first approach would be to simply use multiple drm_mm nodes and freeing the unused ones only after a frame has passed. The problem with this approach is that since stolen memory is rather small, there's a risk we just won't be able to allocate a new CFB from stolen if the previous one was not freed yet. This could happen in case we quickly disable FBC from pipe A and decide to enable it on pipe B, or just if we change pipe A's fb stride while FBC is enabled. The second approach would be similar to the first one, but maintaining a single drm_mm node and keeping track of when it can be reused. This would remove the disadvantage of not having enough space for two nodes, but would create the new problem where we may not be able to enable FBC at the point intel_fbc_update() is called, so we would have to add more code to retry updating FBC after the time has passed. And that can quickly get too complex since we can get invalidate, flush, disable and other calls in the middle of the wait. Both solutions above - and also the current code - have the problem that we unnecessarily free+realloc FBC during invalidate+flush operations even if the CFB size doesn't change. The third option would be to move the allocation/deallocation to enable/disable. This makes sure that the pipe is always disabled when we allocate/deallocate the CFB, so there's no risk that the FBC hardware may read or write to the memory right after it is freed from drm_mm. The downside is that it is possible for user space to change the buffer stride without triggering a disable/enable - only deactivate/activate -, so we'll have to handle this case somehow - see igt's kms_frontbuffer_tracking test, fbc-stridechange subtest. It could be possible to implement a way to free+alloc the CFB during said stride change, but it would involve a lot of book-keeping - exactly as mentioned above - just for on case, so for now I'll keep it simple and just deactivate FBC. Besides, we may not even need to disable FBC since we do CFB over-allocation. Note from Chris: "Starting a fullscreen client that covers a single monitor in a multi-monitor setup will trigger a change in stride on one of the CRTCs (the monitors will be flipped independently).". It shouldn't be a huge problem if we lose FBC on multi-monitor setups since these setups already have problems reaching deep PC states anyway. v2: Rebase after changing the patch order. v3: - Remove references to the stride change case being "uncommon" and paste Chris' example. - Rebase after a change in a previous patch. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 14:19:21 -03:00
drm_WARN_ON(&i915->drm,
drm_mm_node_allocated(&fbc->compressed_fb));
drm_WARN_ON(&i915->drm,
drm_mm_node_allocated(&fbc->compressed_llb));
drm/i915: alloc/free the FBC CFB during enable/disable One of the problems with the current code is that it frees the CFB and releases its drm_mm node as soon as we flip FBC's enable bit. This is bad because after we disable FBC the hardware may still use the CFB for the rest of the frame, so in theory we should only release the drm_mm node one frame after we disable FBC. Otherwise, a stolen memory allocation done right after an FBC disable may result in either corrupted memory for the new owner of that memory region or corrupted screen/underruns in case the new owner changes it while the hardware is still reading it. This case is not exactly easy to reproduce since we currently don't do a lot of stolen memory allocations, but I see patches on the mailing list trying to expose stolen memory to user space, so races will be possible. I thought about three different approaches to solve this, and they all have downsides. The first approach would be to simply use multiple drm_mm nodes and freeing the unused ones only after a frame has passed. The problem with this approach is that since stolen memory is rather small, there's a risk we just won't be able to allocate a new CFB from stolen if the previous one was not freed yet. This could happen in case we quickly disable FBC from pipe A and decide to enable it on pipe B, or just if we change pipe A's fb stride while FBC is enabled. The second approach would be similar to the first one, but maintaining a single drm_mm node and keeping track of when it can be reused. This would remove the disadvantage of not having enough space for two nodes, but would create the new problem where we may not be able to enable FBC at the point intel_fbc_update() is called, so we would have to add more code to retry updating FBC after the time has passed. And that can quickly get too complex since we can get invalidate, flush, disable and other calls in the middle of the wait. Both solutions above - and also the current code - have the problem that we unnecessarily free+realloc FBC during invalidate+flush operations even if the CFB size doesn't change. The third option would be to move the allocation/deallocation to enable/disable. This makes sure that the pipe is always disabled when we allocate/deallocate the CFB, so there's no risk that the FBC hardware may read or write to the memory right after it is freed from drm_mm. The downside is that it is possible for user space to change the buffer stride without triggering a disable/enable - only deactivate/activate -, so we'll have to handle this case somehow - see igt's kms_frontbuffer_tracking test, fbc-stridechange subtest. It could be possible to implement a way to free+alloc the CFB during said stride change, but it would involve a lot of book-keeping - exactly as mentioned above - just for on case, so for now I'll keep it simple and just deactivate FBC. Besides, we may not even need to disable FBC since we do CFB over-allocation. Note from Chris: "Starting a fullscreen client that covers a single monitor in a multi-monitor setup will trigger a change in stride on one of the CRTCs (the monitors will be flipped independently).". It shouldn't be a huge problem if we lose FBC on multi-monitor setups since these setups already have problems reaching deep PC states anyway. v2: Rebase after changing the patch order. v3: - Remove references to the stride change case being "uncommon" and paste Chris' example. - Rebase after a change in a previous patch. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 14:19:21 -03:00
if (DISPLAY_VER(i915) < 5 && !IS_G4X(i915)) {
ret = i915_gem_stolen_insert_node(i915, &fbc->compressed_llb,
4096, 4096);
if (ret)
goto err;
}
ret = find_compression_limit(fbc, size, min_limit);
if (!ret)
goto err_llb;
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
else if (ret > min_limit)
drm_info_once(&i915->drm,
"Reducing the compressed framebuffer size. This may lead to less power savings than a non-reduced-size. Try to increase stolen memory size if available in BIOS.\n");
fbc->limit = ret;
drm_dbg_kms(&i915->drm,
"reserved %llu bytes of contiguous stolen space for FBC, limit: %d\n",
fbc->compressed_fb.size, fbc->limit);
return 0;
err_llb:
if (drm_mm_node_allocated(&fbc->compressed_llb))
i915_gem_stolen_remove_node(i915, &fbc->compressed_llb);
err:
if (drm_mm_initialized(&i915->mm.stolen))
drm_info_once(&i915->drm, "not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size);
return -ENOSPC;
}
static void intel_fbc_program_cfb(struct intel_fbc *fbc)
{
fbc->funcs->program_cfb(fbc);
}
static void __intel_fbc_cleanup_cfb(struct intel_fbc *fbc)
{
struct drm_i915_private *i915 = fbc->i915;
if (WARN_ON(intel_fbc_hw_is_active(fbc)))
return;
if (drm_mm_node_allocated(&fbc->compressed_llb))
i915_gem_stolen_remove_node(i915, &fbc->compressed_llb);
if (drm_mm_node_allocated(&fbc->compressed_fb))
i915_gem_stolen_remove_node(i915, &fbc->compressed_fb);
}
void intel_fbc_cleanup(struct drm_i915_private *i915)
{
struct intel_fbc *fbc;
enum intel_fbc_id fbc_id;
for_each_intel_fbc(i915, fbc, fbc_id) {
mutex_lock(&fbc->lock);
__intel_fbc_cleanup_cfb(fbc);
mutex_unlock(&fbc->lock);
kfree(fbc);
}
}
static bool stride_is_valid(const struct intel_plane_state *plane_state)
{
struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev);
const struct drm_framebuffer *fb = plane_state->hw.fb;
unsigned int stride = intel_fbc_plane_stride(plane_state) *
fb->format->cpp[0];
/* This should have been caught earlier. */
if (drm_WARN_ON_ONCE(&i915->drm, (stride & (64 - 1)) != 0))
return false;
/* Below are the additional FBC restrictions. */
if (stride < 512)
return false;
if (DISPLAY_VER(i915) == 2 || DISPLAY_VER(i915) == 3)
return stride == 4096 || stride == 8192;
if (DISPLAY_VER(i915) == 4 && !IS_G4X(i915) && stride < 2048)
return false;
/* Display WA #1105: skl,bxt,kbl,cfl,glk */
if ((DISPLAY_VER(i915) == 9 || IS_GEMINILAKE(i915)) &&
fb->modifier == DRM_FORMAT_MOD_LINEAR && stride & 511)
return false;
if (stride > 16384)
return false;
return true;
}
static bool pixel_format_is_valid(const struct intel_plane_state *plane_state)
{
struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev);
const struct drm_framebuffer *fb = plane_state->hw.fb;
switch (fb->format->format) {
case DRM_FORMAT_XRGB8888:
case DRM_FORMAT_XBGR8888:
return true;
case DRM_FORMAT_XRGB1555:
case DRM_FORMAT_RGB565:
/* 16bpp not supported on gen2 */
if (DISPLAY_VER(i915) == 2)
return false;
/* WaFbcOnly1to1Ratio:ctg */
if (IS_G4X(i915))
return false;
return true;
default:
return false;
}
}
static bool rotation_is_valid(const struct intel_plane_state *plane_state)
drm/i915/display/fbc: Make fences a nice-to-have for GEN9+ dGFX has local memory so it does not have aperture or support CPU fences but even for iGFX it have a small number of fences. As replacement for fences to track frontbuffer modifications by CPU we have a software tracking that is already in used by FBC and PSR. PSR don't support fences so it shows that this tracking is reliable. So lets make fences a nice-to-have to activate FBC for GEN9+, this will allow us to enable FBC for dGFXs and iGFXs even when there is no available fence. We do not set fences to rotated planes but FBC only have restrictions against 16bpp, so adding it here. Also adding a new check for the tiling format, fences are only set to X and Y tiled planes but again FBC don't have any restrictions against tiling so adding linear as supported as well, other formats should be added after tested but IGT only supports drawing in thse 3 formats. intel_fbc_hw_tracking_covers_screen() maybe can also have the same treatment as fences but BSpec is not clear if the size limitation is for hardware tracking or general use of FBC and I don't have a 5K display to test it, so keeping as is for safety. v2: - Added tiling and pixel format rotation checks - Changed the GEN version not requiring fences to 11 from 9, DDX needs some changes but it don't have support for GEN11+ v3: - Changed back to GEN9+ - Moved GEN test to inside of tiling_is_valid() v4: - moved rotation check to its own functions v5: - renamed rotations_is_valid to rotation_is_valid - moved pre-g4x rotation check to rotation_is_valid() Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200319211535.114625-1-jose.souza@intel.com
2020-03-19 14:15:35 -07:00
{
struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev);
const struct drm_framebuffer *fb = plane_state->hw.fb;
unsigned int rotation = plane_state->hw.rotation;
if (DISPLAY_VER(i915) >= 9 && fb->format->format == DRM_FORMAT_RGB565 &&
drm/i915/display/fbc: Make fences a nice-to-have for GEN9+ dGFX has local memory so it does not have aperture or support CPU fences but even for iGFX it have a small number of fences. As replacement for fences to track frontbuffer modifications by CPU we have a software tracking that is already in used by FBC and PSR. PSR don't support fences so it shows that this tracking is reliable. So lets make fences a nice-to-have to activate FBC for GEN9+, this will allow us to enable FBC for dGFXs and iGFXs even when there is no available fence. We do not set fences to rotated planes but FBC only have restrictions against 16bpp, so adding it here. Also adding a new check for the tiling format, fences are only set to X and Y tiled planes but again FBC don't have any restrictions against tiling so adding linear as supported as well, other formats should be added after tested but IGT only supports drawing in thse 3 formats. intel_fbc_hw_tracking_covers_screen() maybe can also have the same treatment as fences but BSpec is not clear if the size limitation is for hardware tracking or general use of FBC and I don't have a 5K display to test it, so keeping as is for safety. v2: - Added tiling and pixel format rotation checks - Changed the GEN version not requiring fences to 11 from 9, DDX needs some changes but it don't have support for GEN11+ v3: - Changed back to GEN9+ - Moved GEN test to inside of tiling_is_valid() v4: - moved rotation check to its own functions v5: - renamed rotations_is_valid to rotation_is_valid - moved pre-g4x rotation check to rotation_is_valid() Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200319211535.114625-1-jose.souza@intel.com
2020-03-19 14:15:35 -07:00
drm_rotation_90_or_270(rotation))
return false;
else if (DISPLAY_VER(i915) <= 4 && !IS_G4X(i915) &&
drm/i915/display/fbc: Make fences a nice-to-have for GEN9+ dGFX has local memory so it does not have aperture or support CPU fences but even for iGFX it have a small number of fences. As replacement for fences to track frontbuffer modifications by CPU we have a software tracking that is already in used by FBC and PSR. PSR don't support fences so it shows that this tracking is reliable. So lets make fences a nice-to-have to activate FBC for GEN9+, this will allow us to enable FBC for dGFXs and iGFXs even when there is no available fence. We do not set fences to rotated planes but FBC only have restrictions against 16bpp, so adding it here. Also adding a new check for the tiling format, fences are only set to X and Y tiled planes but again FBC don't have any restrictions against tiling so adding linear as supported as well, other formats should be added after tested but IGT only supports drawing in thse 3 formats. intel_fbc_hw_tracking_covers_screen() maybe can also have the same treatment as fences but BSpec is not clear if the size limitation is for hardware tracking or general use of FBC and I don't have a 5K display to test it, so keeping as is for safety. v2: - Added tiling and pixel format rotation checks - Changed the GEN version not requiring fences to 11 from 9, DDX needs some changes but it don't have support for GEN11+ v3: - Changed back to GEN9+ - Moved GEN test to inside of tiling_is_valid() v4: - moved rotation check to its own functions v5: - renamed rotations_is_valid to rotation_is_valid - moved pre-g4x rotation check to rotation_is_valid() Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200319211535.114625-1-jose.souza@intel.com
2020-03-19 14:15:35 -07:00
rotation != DRM_MODE_ROTATE_0)
return false;
return true;
}
/*
* For some reason, the hardware tracking starts looking at whatever we
* programmed as the display plane base address register. It does not look at
* the X and Y offset registers. That's why we include the src x/y offsets
* instead of just looking at the plane size.
*/
static bool intel_fbc_hw_tracking_covers_screen(const struct intel_plane_state *plane_state)
{
struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev);
unsigned int effective_w, effective_h, max_w, max_h;
if (DISPLAY_VER(i915) >= 10) {
max_w = 5120;
max_h = 4096;
} else if (DISPLAY_VER(i915) >= 8 || IS_HASWELL(i915)) {
max_w = 4096;
max_h = 4096;
} else if (IS_G4X(i915) || DISPLAY_VER(i915) >= 5) {
max_w = 4096;
max_h = 2048;
} else {
max_w = 2048;
max_h = 1536;
}
effective_w = plane_state->view.color_plane[0].x +
(drm_rect_width(&plane_state->uapi.src) >> 16);
effective_h = plane_state->view.color_plane[0].y +
(drm_rect_height(&plane_state->uapi.src) >> 16);
return effective_w <= max_w && effective_h <= max_h;
}
static bool tiling_is_valid(const struct intel_plane_state *plane_state)
drm/i915/display/fbc: Make fences a nice-to-have for GEN9+ dGFX has local memory so it does not have aperture or support CPU fences but even for iGFX it have a small number of fences. As replacement for fences to track frontbuffer modifications by CPU we have a software tracking that is already in used by FBC and PSR. PSR don't support fences so it shows that this tracking is reliable. So lets make fences a nice-to-have to activate FBC for GEN9+, this will allow us to enable FBC for dGFXs and iGFXs even when there is no available fence. We do not set fences to rotated planes but FBC only have restrictions against 16bpp, so adding it here. Also adding a new check for the tiling format, fences are only set to X and Y tiled planes but again FBC don't have any restrictions against tiling so adding linear as supported as well, other formats should be added after tested but IGT only supports drawing in thse 3 formats. intel_fbc_hw_tracking_covers_screen() maybe can also have the same treatment as fences but BSpec is not clear if the size limitation is for hardware tracking or general use of FBC and I don't have a 5K display to test it, so keeping as is for safety. v2: - Added tiling and pixel format rotation checks - Changed the GEN version not requiring fences to 11 from 9, DDX needs some changes but it don't have support for GEN11+ v3: - Changed back to GEN9+ - Moved GEN test to inside of tiling_is_valid() v4: - moved rotation check to its own functions v5: - renamed rotations_is_valid to rotation_is_valid - moved pre-g4x rotation check to rotation_is_valid() Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200319211535.114625-1-jose.souza@intel.com
2020-03-19 14:15:35 -07:00
{
struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev);
const struct drm_framebuffer *fb = plane_state->hw.fb;
switch (fb->modifier) {
drm/i915/display/fbc: Make fences a nice-to-have for GEN9+ dGFX has local memory so it does not have aperture or support CPU fences but even for iGFX it have a small number of fences. As replacement for fences to track frontbuffer modifications by CPU we have a software tracking that is already in used by FBC and PSR. PSR don't support fences so it shows that this tracking is reliable. So lets make fences a nice-to-have to activate FBC for GEN9+, this will allow us to enable FBC for dGFXs and iGFXs even when there is no available fence. We do not set fences to rotated planes but FBC only have restrictions against 16bpp, so adding it here. Also adding a new check for the tiling format, fences are only set to X and Y tiled planes but again FBC don't have any restrictions against tiling so adding linear as supported as well, other formats should be added after tested but IGT only supports drawing in thse 3 formats. intel_fbc_hw_tracking_covers_screen() maybe can also have the same treatment as fences but BSpec is not clear if the size limitation is for hardware tracking or general use of FBC and I don't have a 5K display to test it, so keeping as is for safety. v2: - Added tiling and pixel format rotation checks - Changed the GEN version not requiring fences to 11 from 9, DDX needs some changes but it don't have support for GEN11+ v3: - Changed back to GEN9+ - Moved GEN test to inside of tiling_is_valid() v4: - moved rotation check to its own functions v5: - renamed rotations_is_valid to rotation_is_valid - moved pre-g4x rotation check to rotation_is_valid() Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200319211535.114625-1-jose.souza@intel.com
2020-03-19 14:15:35 -07:00
case DRM_FORMAT_MOD_LINEAR:
case I915_FORMAT_MOD_Y_TILED:
case I915_FORMAT_MOD_Yf_TILED:
return DISPLAY_VER(i915) >= 9;
case I915_FORMAT_MOD_4_TILED:
case I915_FORMAT_MOD_X_TILED:
drm/i915/display/fbc: Make fences a nice-to-have for GEN9+ dGFX has local memory so it does not have aperture or support CPU fences but even for iGFX it have a small number of fences. As replacement for fences to track frontbuffer modifications by CPU we have a software tracking that is already in used by FBC and PSR. PSR don't support fences so it shows that this tracking is reliable. So lets make fences a nice-to-have to activate FBC for GEN9+, this will allow us to enable FBC for dGFXs and iGFXs even when there is no available fence. We do not set fences to rotated planes but FBC only have restrictions against 16bpp, so adding it here. Also adding a new check for the tiling format, fences are only set to X and Y tiled planes but again FBC don't have any restrictions against tiling so adding linear as supported as well, other formats should be added after tested but IGT only supports drawing in thse 3 formats. intel_fbc_hw_tracking_covers_screen() maybe can also have the same treatment as fences but BSpec is not clear if the size limitation is for hardware tracking or general use of FBC and I don't have a 5K display to test it, so keeping as is for safety. v2: - Added tiling and pixel format rotation checks - Changed the GEN version not requiring fences to 11 from 9, DDX needs some changes but it don't have support for GEN11+ v3: - Changed back to GEN9+ - Moved GEN test to inside of tiling_is_valid() v4: - moved rotation check to its own functions v5: - renamed rotations_is_valid to rotation_is_valid - moved pre-g4x rotation check to rotation_is_valid() Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20200319211535.114625-1-jose.souza@intel.com
2020-03-19 14:15:35 -07:00
return true;
default:
return false;
}
}
static void intel_fbc_update_state(struct intel_atomic_state *state,
struct intel_crtc *crtc,
struct intel_plane *plane)
{
struct drm_i915_private *i915 = to_i915(state->base.dev);
const struct intel_crtc_state *crtc_state =
intel_atomic_get_new_crtc_state(state, crtc);
const struct intel_plane_state *plane_state =
intel_atomic_get_new_plane_state(state, plane);
struct intel_fbc *fbc = plane->fbc;
struct intel_fbc_state *fbc_state = &fbc->state;
WARN_ON(plane_state->no_fbc_reason);
fbc_state->plane = plane;
/* FBC1 compression interval: arbitrary choice of 1 second */
fbc_state->interval = drm_mode_vrefresh(&crtc_state->hw.adjusted_mode);
fbc_state->fence_y_offset = intel_plane_fence_y_offset(plane_state);
drm_WARN_ON(&i915->drm, plane_state->flags & PLANE_HAS_FENCE &&
!plane_state->ggtt_vma->fence);
if (plane_state->flags & PLANE_HAS_FENCE &&
plane_state->ggtt_vma->fence)
fbc_state->fence_id = plane_state->ggtt_vma->fence->id;
else
fbc_state->fence_id = -1;
fbc_state->cfb_stride = intel_fbc_cfb_stride(plane_state);
fbc_state->cfb_size = intel_fbc_cfb_size(plane_state);
fbc_state->override_cfb_stride = intel_fbc_override_cfb_stride(plane_state);
}
static bool intel_fbc_is_fence_ok(const struct intel_plane_state *plane_state)
{
struct drm_i915_private *i915 = to_i915(plane_state->uapi.plane->dev);
/* The use of a CPU fence is one of two ways to detect writes by the
* CPU to the scanout and trigger updates to the FBC.
*
* The other method is by software tracking (see
* intel_fbc_invalidate/flush()), it will manually notify FBC and nuke
* the current compressed buffer and recompress it.
*
* Note that is possible for a tiled surface to be unmappable (and
* so have no fence associated with it) due to aperture constraints
* at the time of pinning.
*
* FIXME with 90/270 degree rotation we should use the fence on
* the normal GTT view (the rotated view doesn't even have a
* fence). Would need changes to the FBC fence Y offset as well.
* For now this will effectively disable FBC with 90/270 degree
* rotation.
*/
return DISPLAY_VER(i915) >= 9 ||
(plane_state->flags & PLANE_HAS_FENCE &&
plane_state->ggtt_vma->fence);
}
static bool intel_fbc_is_cfb_ok(const struct intel_plane_state *plane_state)
{
struct intel_plane *plane = to_intel_plane(plane_state->uapi.plane);
struct intel_fbc *fbc = plane->fbc;
return intel_fbc_min_limit(plane_state) <= fbc->limit &&
intel_fbc_cfb_size(plane_state) <= fbc->compressed_fb.size * fbc->limit;
}
static bool intel_fbc_is_ok(const struct intel_plane_state *plane_state)
{
return !plane_state->no_fbc_reason &&
intel_fbc_is_fence_ok(plane_state) &&
intel_fbc_is_cfb_ok(plane_state);
}
static int intel_fbc_check_plane(struct intel_atomic_state *state,
struct intel_plane *plane)
{
struct drm_i915_private *i915 = to_i915(state->base.dev);
struct intel_plane_state *plane_state =
intel_atomic_get_new_plane_state(state, plane);
const struct drm_framebuffer *fb = plane_state->hw.fb;
struct intel_crtc *crtc = to_intel_crtc(plane_state->uapi.crtc);
const struct intel_crtc_state *crtc_state;
struct intel_fbc *fbc = plane->fbc;
if (!fbc)
return 0;
if (intel_vgpu_active(i915)) {
plane_state->no_fbc_reason = "VGPU active";
return 0;
}
if (!i915->params.enable_fbc) {
plane_state->no_fbc_reason = "disabled per module param or by default";
return 0;
}
if (!plane_state->uapi.visible) {
plane_state->no_fbc_reason = "plane not visible";
return 0;
}
crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
if (crtc_state->hw.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) {
plane_state->no_fbc_reason = "interlaced mode not supported";
return 0;
}
if (crtc_state->double_wide) {
plane_state->no_fbc_reason = "double wide pipe not supported";
return 0;
}
/*
* Display 12+ is not supporting FBC with PSR2.
* Recommendation is to keep this combination disabled
* Bspec: 50422 HSD: 14010260002
*/
if (DISPLAY_VER(i915) >= 12 && crtc_state->has_psr2) {
plane_state->no_fbc_reason = "PSR2 enabled";
return false;
}
if (!pixel_format_is_valid(plane_state)) {
plane_state->no_fbc_reason = "pixel format not supported";
return 0;
}
if (!tiling_is_valid(plane_state)) {
plane_state->no_fbc_reason = "tiling not supported";
return 0;
}
if (!rotation_is_valid(plane_state)) {
plane_state->no_fbc_reason = "rotation not supported";
return 0;
}
if (!stride_is_valid(plane_state)) {
plane_state->no_fbc_reason = "stride not supported";
return 0;
}
if (plane_state->hw.pixel_blend_mode != DRM_MODE_BLEND_PIXEL_NONE &&
fb->format->has_alpha) {
plane_state->no_fbc_reason = "per-pixel alpha not supported";
return false;
}
if (!intel_fbc_hw_tracking_covers_screen(plane_state)) {
plane_state->no_fbc_reason = "plane size too big";
return 0;
}
/*
* Work around a problem on GEN9+ HW, where enabling FBC on a plane
* having a Y offset that isn't divisible by 4 causes FIFO underrun
* and screen flicker.
*/
if (DISPLAY_VER(i915) >= 9 &&
plane_state->view.color_plane[0].y & 3) {
plane_state->no_fbc_reason = "plane start Y offset misaligned";
return false;
}
/* Wa_22010751166: icl, ehl, tgl, dg1, rkl */
if (DISPLAY_VER(i915) >= 11 &&
(plane_state->view.color_plane[0].y +
(drm_rect_height(&plane_state->uapi.src) >> 16)) & 3) {
plane_state->no_fbc_reason = "plane end Y offset misaligned";
return false;
}
/* WaFbcExceedCdClockThreshold:hsw,bdw */
if (IS_HASWELL(i915) || IS_BROADWELL(i915)) {
const struct intel_cdclk_state *cdclk_state;
cdclk_state = intel_atomic_get_cdclk_state(state);
if (IS_ERR(cdclk_state))
return PTR_ERR(cdclk_state);
if (crtc_state->pixel_rate >= cdclk_state->logical.cdclk * 95 / 100) {
plane_state->no_fbc_reason = "pixel rate too high";
return 0;
}
}
plane_state->no_fbc_reason = NULL;
return 0;
}
static bool intel_fbc_can_flip_nuke(struct intel_atomic_state *state,
struct intel_crtc *crtc,
struct intel_plane *plane)
{
const struct intel_crtc_state *new_crtc_state =
intel_atomic_get_new_crtc_state(state, crtc);
const struct intel_plane_state *old_plane_state =
intel_atomic_get_old_plane_state(state, plane);
const struct intel_plane_state *new_plane_state =
intel_atomic_get_new_plane_state(state, plane);
const struct drm_framebuffer *old_fb = old_plane_state->hw.fb;
const struct drm_framebuffer *new_fb = new_plane_state->hw.fb;
if (drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi))
return false;
if (!intel_fbc_is_ok(old_plane_state) ||
!intel_fbc_is_ok(new_plane_state))
return false;
if (old_fb->format->format != new_fb->format->format)
return false;
if (old_fb->modifier != new_fb->modifier)
return false;
if (intel_fbc_plane_stride(old_plane_state) !=
intel_fbc_plane_stride(new_plane_state))
return false;
if (intel_fbc_cfb_stride(old_plane_state) !=
intel_fbc_cfb_stride(new_plane_state))
return false;
if (intel_fbc_cfb_size(old_plane_state) !=
intel_fbc_cfb_size(new_plane_state))
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
return false;
if (intel_fbc_override_cfb_stride(old_plane_state) !=
intel_fbc_override_cfb_stride(new_plane_state))
return false;
return true;
}
static bool __intel_fbc_pre_update(struct intel_atomic_state *state,
struct intel_crtc *crtc,
struct intel_plane *plane)
{
struct drm_i915_private *i915 = to_i915(state->base.dev);
struct intel_fbc *fbc = plane->fbc;
bool need_vblank_wait = false;
fbc->flip_pending = true;
if (intel_fbc_can_flip_nuke(state, crtc, plane))
return need_vblank_wait;
intel_fbc_deactivate(fbc, "update pending");
/*
* Display WA #1198: glk+
* Need an extra vblank wait between FBC disable and most plane
* updates. Bspec says this is only needed for plane disable, but
* that is not true. Touching most plane registers will cause the
* corruption to appear. Also SKL/derivatives do not seem to be
* affected.
*
* TODO: could optimize this a bit by sampling the frame
* counter when we disable FBC (if it was already done earlier)
* and skipping the extra vblank wait before the plane update
* if at least one frame has already passed.
*/
if (fbc->activated && DISPLAY_VER(i915) >= 10)
need_vblank_wait = true;
fbc->activated = false;
return need_vblank_wait;
}
bool intel_fbc_pre_update(struct intel_atomic_state *state,
struct intel_crtc *crtc)
{
const struct intel_plane_state *plane_state;
bool need_vblank_wait = false;
struct intel_plane *plane;
int i;
for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
struct intel_fbc *fbc = plane->fbc;
if (!fbc || plane->pipe != crtc->pipe)
continue;
mutex_lock(&fbc->lock);
if (fbc->state.plane == plane)
need_vblank_wait |= __intel_fbc_pre_update(state, crtc, plane);
mutex_unlock(&fbc->lock);
}
return need_vblank_wait;
}
static void __intel_fbc_disable(struct intel_fbc *fbc)
{
struct drm_i915_private *i915 = fbc->i915;
struct intel_plane *plane = fbc->state.plane;
drm_WARN_ON(&i915->drm, !mutex_is_locked(&fbc->lock));
drm_WARN_ON(&i915->drm, fbc->active);
drm_dbg_kms(&i915->drm, "Disabling FBC on [PLANE:%d:%s]\n",
plane->base.base.id, plane->base.name);
__intel_fbc_cleanup_cfb(fbc);
fbc->state.plane = NULL;
}
static void __intel_fbc_post_update(struct intel_fbc *fbc)
{
struct drm_i915_private *i915 = fbc->i915;
drm_WARN_ON(&i915->drm, !mutex_is_locked(&fbc->lock));
if (!fbc->busy_bits)
intel_fbc_activate(fbc);
else
intel_fbc_deactivate(fbc, "frontbuffer write");
}
void intel_fbc_post_update(struct intel_atomic_state *state,
struct intel_crtc *crtc)
{
const struct intel_plane_state *plane_state;
struct intel_plane *plane;
int i;
for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
struct intel_fbc *fbc = plane->fbc;
if (!fbc || plane->pipe != crtc->pipe)
continue;
mutex_lock(&fbc->lock);
if (fbc->state.plane == plane) {
fbc->flip_pending = false;
__intel_fbc_post_update(fbc);
}
mutex_unlock(&fbc->lock);
}
}
static unsigned int intel_fbc_get_frontbuffer_bit(struct intel_fbc *fbc)
{
if (fbc->state.plane)
return fbc->state.plane->frontbuffer_bit;
else
return fbc->possible_framebuffer_bits;
}
static void __intel_fbc_invalidate(struct intel_fbc *fbc,
unsigned int frontbuffer_bits,
enum fb_op_origin origin)
{
drm/i915/display: Fix glitches when moving cursor with PSR2 selective fetch enabled Legacy cursor APIs are handled by intel_legacy_cursor_update(), that calls drm_atomic_helper_update_plane() when going through the slow/atomic path to update cursor, what was the case for PSR2 selective fetch. drm_atomic_helper_update_plane() sets drm_atomic_state->legacy_cursor_update to true when updating the cursor plane, to allow several cursor updates to happen within the same frame, as userspace does that. If drivers waited for a vblank increment at the end of every cursor movement that would cause a visible lag in the cursor. But this optimization do not properly work with PSR2 selective fetch dirt area calculation, for example if within a single frame the cursor had 3 moves the final dirt area programmed to PSR2_MAN_TRK_CTL would be based in the second movement as old state and third movement as new state, not updating the area where cursor was in the first state. So here switching back to the fast path approach in intel_legacy_cursor_update() and handling cursor movements as frontbuffer rendering(psr_force_hw_tracking_exit()), that is not the most optimal for power-savings but is the solution that we have until mailbox style updates is implemented. Also removing the cursor workaround as not it is properly undestand the issue and is know that it will never cover all the cases. Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com> Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Acked-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210930001409.254817-5-jose.souza@intel.com
2021-09-29 17:14:05 -07:00
if (origin == ORIGIN_FLIP || origin == ORIGIN_CURSOR_UPDATE)
return;
mutex_lock(&fbc->lock);
fbc->busy_bits |= intel_fbc_get_frontbuffer_bit(fbc) & frontbuffer_bits;
if (fbc->state.plane && fbc->busy_bits)
intel_fbc_deactivate(fbc, "frontbuffer write");
mutex_unlock(&fbc->lock);
}
void intel_fbc_invalidate(struct drm_i915_private *i915,
unsigned int frontbuffer_bits,
enum fb_op_origin origin)
{
struct intel_fbc *fbc;
enum intel_fbc_id fbc_id;
for_each_intel_fbc(i915, fbc, fbc_id)
__intel_fbc_invalidate(fbc, frontbuffer_bits, origin);
}
static void __intel_fbc_flush(struct intel_fbc *fbc,
unsigned int frontbuffer_bits,
enum fb_op_origin origin)
{
mutex_lock(&fbc->lock);
fbc->busy_bits &= ~frontbuffer_bits;
drm/i915/display: Fix glitches when moving cursor with PSR2 selective fetch enabled Legacy cursor APIs are handled by intel_legacy_cursor_update(), that calls drm_atomic_helper_update_plane() when going through the slow/atomic path to update cursor, what was the case for PSR2 selective fetch. drm_atomic_helper_update_plane() sets drm_atomic_state->legacy_cursor_update to true when updating the cursor plane, to allow several cursor updates to happen within the same frame, as userspace does that. If drivers waited for a vblank increment at the end of every cursor movement that would cause a visible lag in the cursor. But this optimization do not properly work with PSR2 selective fetch dirt area calculation, for example if within a single frame the cursor had 3 moves the final dirt area programmed to PSR2_MAN_TRK_CTL would be based in the second movement as old state and third movement as new state, not updating the area where cursor was in the first state. So here switching back to the fast path approach in intel_legacy_cursor_update() and handling cursor movements as frontbuffer rendering(psr_force_hw_tracking_exit()), that is not the most optimal for power-savings but is the solution that we have until mailbox style updates is implemented. Also removing the cursor workaround as not it is properly undestand the issue and is know that it will never cover all the cases. Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com> Signed-off-by: José Roberto de Souza <jose.souza@intel.com> Acked-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210930001409.254817-5-jose.souza@intel.com
2021-09-29 17:14:05 -07:00
if (origin == ORIGIN_FLIP || origin == ORIGIN_CURSOR_UPDATE)
goto out;
if (!fbc->busy_bits && fbc->state.plane &&
(frontbuffer_bits & intel_fbc_get_frontbuffer_bit(fbc))) {
if (fbc->active)
intel_fbc_nuke(fbc);
else if (!fbc->flip_pending)
__intel_fbc_post_update(fbc);
}
out:
mutex_unlock(&fbc->lock);
}
void intel_fbc_flush(struct drm_i915_private *i915,
unsigned int frontbuffer_bits,
enum fb_op_origin origin)
{
struct intel_fbc *fbc;
enum intel_fbc_id fbc_id;
for_each_intel_fbc(i915, fbc, fbc_id)
__intel_fbc_flush(fbc, frontbuffer_bits, origin);
}
int intel_fbc_atomic_check(struct intel_atomic_state *state)
{
struct intel_plane_state *plane_state;
struct intel_plane *plane;
int i;
for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
int ret;
ret = intel_fbc_check_plane(state, plane);
if (ret)
return ret;
}
return 0;
}
static void __intel_fbc_enable(struct intel_atomic_state *state,
struct intel_crtc *crtc,
struct intel_plane *plane)
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
{
struct drm_i915_private *i915 = to_i915(state->base.dev);
const struct intel_plane_state *plane_state =
intel_atomic_get_new_plane_state(state, plane);
struct intel_fbc *fbc = plane->fbc;
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
if (fbc->state.plane) {
if (fbc->state.plane != plane)
return;
drm/i915/fbc: Rework cfb stride/size calculations The code to calculate the cfb stride/size is a bit of mess. The cfb size is getting calculated based purely on the plane stride and plane height. That doesn't account for extra alignment we want for the cfb stride. The gen9 override stride OTOH is just calculated based on the plane width, and it does try to make things more aligned but any extra alignment added there is not considered in the cfb size calculations. So not at all convinced this is working as intended. Additionally the compression limit handling is split between the cfb allocation code and g4x_dpfc_ctl_limit() (for the 16bpp case), which is just confusing. Let's streamline the whole thing: - Start with the plane stride, convert that into cfb stride (cfb is always 4 bytes per pixel). All the calculations will assume 1:1 compression limit since that will give us the max values, and we don't yet know how much stolen memory we will be able to allocate - Align the cfb stride to 512 bytes on modern platforms. This guarantees the 4 line segment will be 512 byte aligned regardles of the final compression limit we choose later. The 512 byte alignment for the segment is required by at least some of the platforms, and just doing it always seems like the easiest option - Figure out if we need to use the override stride or not. For X-tiled it's never needed since the plane stride is already 512 byte aligned, for Y-tiled it will be needed if the plane stride is not a multiple of 512 bytes, and for linear it's apparently always needed because the hardware miscalculates the cfb stride as PLANE_STRIDE*512 instead of the PLANE_STRIDE*64 that it use with linear. - The cfb size will be calculated based on the aligned cfb stride to guarantee we actually reserved enough stolen memory and the FBC hw won't end up scribbling over whatever else is allocated in stolen - The compression limit handling we just do fully in the cfb allocation code to make things less confusing v2: Write the min cfb segment stride calculation in a more explicit way to make it clear what is going on v3: Remeber to update fbc->limit when changing to 16bpp Reviewed-by: Uma Shankar <uma.shankar@intel.com> #v2 Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210923042151.19052-1-ville.syrjala@linux.intel.com
2021-09-23 07:21:51 +03:00
if (intel_fbc_is_ok(plane_state)) {
intel_fbc_update_state(state, crtc, plane);
return;
}
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
__intel_fbc_disable(fbc);
}
drm_WARN_ON(&i915->drm, fbc->active);
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
fbc->no_fbc_reason = plane_state->no_fbc_reason;
if (fbc->no_fbc_reason)
return;
if (!intel_fbc_is_fence_ok(plane_state)) {
fbc->no_fbc_reason = "framebuffer not fenced";
return;
}
if (fbc->underrun_detected) {
fbc->no_fbc_reason = "FIFO underrun";
return;
}
if (intel_fbc_alloc_cfb(fbc, intel_fbc_cfb_size(plane_state),
intel_fbc_min_limit(plane_state))) {
drm/i915/fbc: don't print no_fbc_reason to dmesg Our dmesg messages started being misleading after we converted to the enable+activate model: we always print "Disabling FBC", even when we're just deactivating it. So, for example, when I boot my machine and do "dmesg | grep -i fbc", I see: [drm:intel_fbc_enable] Enabling FBC on pipe A [drm:set_no_fbc_reason] Disabling FBC: framebuffer not tiled or fenced but then, if I read the debugfs file, I will see: $ sudo cat i915_fbc_status FBC enabled Compressing: yes so we can conclude that dmesg is misleading, since FBC is actually enabled. What happened is that we deactivated FBC due to fbcon not being tiled, but when we silently reactivated it when the display manager started. We don't print activation messages since there may be way too many of these operations per second during normal desktop usage. One possible solution would be to change set_no_fbc_reason to correctly differentiate between disable and deactivation, but we removed support from printing activation/deactivation messages in the past because they were too frequent. So instead of doing this, let's just not print anything on dmesg, and leave the debugfs file if the user needs to investigate something. We already print when we enable and disable FBC anyway on a given pipe, so this should already help triaging bugs. Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1453210558-7875-22-git-send-email-paulo.r.zanoni@intel.com
2016-01-19 11:35:54 -02:00
fbc->no_fbc_reason = "not enough stolen memory";
return;
drm/i915: alloc/free the FBC CFB during enable/disable One of the problems with the current code is that it frees the CFB and releases its drm_mm node as soon as we flip FBC's enable bit. This is bad because after we disable FBC the hardware may still use the CFB for the rest of the frame, so in theory we should only release the drm_mm node one frame after we disable FBC. Otherwise, a stolen memory allocation done right after an FBC disable may result in either corrupted memory for the new owner of that memory region or corrupted screen/underruns in case the new owner changes it while the hardware is still reading it. This case is not exactly easy to reproduce since we currently don't do a lot of stolen memory allocations, but I see patches on the mailing list trying to expose stolen memory to user space, so races will be possible. I thought about three different approaches to solve this, and they all have downsides. The first approach would be to simply use multiple drm_mm nodes and freeing the unused ones only after a frame has passed. The problem with this approach is that since stolen memory is rather small, there's a risk we just won't be able to allocate a new CFB from stolen if the previous one was not freed yet. This could happen in case we quickly disable FBC from pipe A and decide to enable it on pipe B, or just if we change pipe A's fb stride while FBC is enabled. The second approach would be similar to the first one, but maintaining a single drm_mm node and keeping track of when it can be reused. This would remove the disadvantage of not having enough space for two nodes, but would create the new problem where we may not be able to enable FBC at the point intel_fbc_update() is called, so we would have to add more code to retry updating FBC after the time has passed. And that can quickly get too complex since we can get invalidate, flush, disable and other calls in the middle of the wait. Both solutions above - and also the current code - have the problem that we unnecessarily free+realloc FBC during invalidate+flush operations even if the CFB size doesn't change. The third option would be to move the allocation/deallocation to enable/disable. This makes sure that the pipe is always disabled when we allocate/deallocate the CFB, so there's no risk that the FBC hardware may read or write to the memory right after it is freed from drm_mm. The downside is that it is possible for user space to change the buffer stride without triggering a disable/enable - only deactivate/activate -, so we'll have to handle this case somehow - see igt's kms_frontbuffer_tracking test, fbc-stridechange subtest. It could be possible to implement a way to free+alloc the CFB during said stride change, but it would involve a lot of book-keeping - exactly as mentioned above - just for on case, so for now I'll keep it simple and just deactivate FBC. Besides, we may not even need to disable FBC since we do CFB over-allocation. Note from Chris: "Starting a fullscreen client that covers a single monitor in a multi-monitor setup will trigger a change in stride on one of the CRTCs (the monitors will be flipped independently).". It shouldn't be a huge problem if we lose FBC on multi-monitor setups since these setups already have problems reaching deep PC states anyway. v2: Rebase after changing the patch order. v3: - Remove references to the stride change case being "uncommon" and paste Chris' example. - Rebase after a change in a previous patch. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 14:19:21 -03:00
}
drm_dbg_kms(&i915->drm, "Enabling FBC on [PLANE:%d:%s]\n",
plane->base.base.id, plane->base.name);
fbc->no_fbc_reason = "FBC enabled but not active yet\n";
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
intel_fbc_update_state(state, crtc, plane);
intel_fbc_program_cfb(fbc);
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
}
/**
* intel_fbc_disable - disable FBC if it's associated with crtc
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
* @crtc: the CRTC
*
* This function disables FBC if it's associated with the provided CRTC.
*/
void intel_fbc_disable(struct intel_crtc *crtc)
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
{
struct drm_i915_private *i915 = to_i915(crtc->base.dev);
struct intel_plane *plane;
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
for_each_intel_plane(&i915->drm, plane) {
struct intel_fbc *fbc = plane->fbc;
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
if (!fbc || plane->pipe != crtc->pipe)
continue;
mutex_lock(&fbc->lock);
if (fbc->state.plane == plane)
__intel_fbc_disable(fbc);
mutex_unlock(&fbc->lock);
}
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
}
void intel_fbc_update(struct intel_atomic_state *state,
struct intel_crtc *crtc)
{
const struct intel_crtc_state *crtc_state =
intel_atomic_get_new_crtc_state(state, crtc);
const struct intel_plane_state *plane_state;
struct intel_plane *plane;
int i;
for_each_new_intel_plane_in_state(state, plane, plane_state, i) {
struct intel_fbc *fbc = plane->fbc;
if (!fbc || plane->pipe != crtc->pipe)
continue;
mutex_lock(&fbc->lock);
if (crtc_state->update_pipe && plane_state->no_fbc_reason) {
if (fbc->state.plane == plane)
__intel_fbc_disable(fbc);
} else {
__intel_fbc_enable(state, crtc, plane);
}
mutex_unlock(&fbc->lock);
}
}
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
/**
* intel_fbc_global_disable - globally disable FBC
* @i915: i915 device instance
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
*
* This function disables FBC regardless of which CRTC is associated with it.
*/
void intel_fbc_global_disable(struct drm_i915_private *i915)
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
{
struct intel_fbc *fbc;
enum intel_fbc_id fbc_id;
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
for_each_intel_fbc(i915, fbc, fbc_id) {
mutex_lock(&fbc->lock);
if (fbc->state.plane)
__intel_fbc_disable(fbc);
mutex_unlock(&fbc->lock);
}
drm/i915: introduce intel_fbc_{enable,disable} The goal is to call FBC enable/disable only once per modeset, while activate/deactivate/update will be called multiple times. The enable() function will be responsible for deciding if a CRTC will have FBC on it and then it will "lock" FBC on this CRTC: it won't be possible to change FBC's CRTC until disable(). With this, all checks and resource acquisition that only need to be done once per modeset can be moved from update() to enable(). And then the update(), activate() and deactivate() code will also get simpler since they won't need to worry about the CRTC being changed. The disable() function will do the reverse operation of enable(). One of its features is that it should only be called while the pipe is already off. This guarantees that FBC is stopped and nothing is using the CFB. With this, the activate() and deactivate() functions just start and temporarily stop FBC. They are the ones touching the hardware enable bit, so HW state reflects dev_priv->crtc.active. The last function remaining is update(). A lot of times I thought about renaming update() to activate() or try_to_activate() since it's called when we want to activate FBC. The thing is that update() may not only decide to activate FBC, but also deactivate or keep it on the same state, so I'll leave this name for now. Moving code to enable() and disable() will also help in case we decide to move FBC to pipe_config or something else later. The current patch only puts the very basic code on enable() and disable(). The next commits will take care of moving more stuff from update() to the new functions. v2: - Rebase. - Improve commit message (Chris). v3: Rebase after changing the patch order. v4: Rebase again after upstream changes. Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/
2015-10-15 10:44:46 -03:00
}
drm/i915/fbc: disable FBC on FIFO underruns Ever since I started working on FBC I was already aware that FBC can really amplify the FIFO underrun symptoms. On systems where FIFO underruns were harmless error messages, enabling FBC would cause the underruns to give black screens. We recently tried to enable FBC on Haswell and got reports of a system that would hang after some hours of uptime, and the first bad commit was the one that enabled FBC. We also observed that this system had FIFO underrun error messages on its dmesg. Although we don't have any evidence that fixing the underruns would solve the bug and make FBC work properly on this machine, IMHO it's better if we minimize the amount of possible problems by just giving up FBC whenever we detect an underrun. v2: New version, different implementation and commit message. v3: Clarify the fact that we run from an IRQ handler (Chris). v4: Also add the underrun_detected check at can_choose() to avoid misleading dmesg messages (DK). v5: Fix Engrish, use READ_ONCE on the unlocked read (Chris). Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Lyude <cpaul@redhat.com> Cc: stevenhoneyman@gmail.com <stevenhoneyman@gmail.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1473773937-19758-1-git-send-email-paulo.r.zanoni@intel.com
2016-09-13 10:38:57 -03:00
static void intel_fbc_underrun_work_fn(struct work_struct *work)
{
struct intel_fbc *fbc = container_of(work, typeof(*fbc), underrun_work);
struct drm_i915_private *i915 = fbc->i915;
drm/i915/fbc: disable FBC on FIFO underruns Ever since I started working on FBC I was already aware that FBC can really amplify the FIFO underrun symptoms. On systems where FIFO underruns were harmless error messages, enabling FBC would cause the underruns to give black screens. We recently tried to enable FBC on Haswell and got reports of a system that would hang after some hours of uptime, and the first bad commit was the one that enabled FBC. We also observed that this system had FIFO underrun error messages on its dmesg. Although we don't have any evidence that fixing the underruns would solve the bug and make FBC work properly on this machine, IMHO it's better if we minimize the amount of possible problems by just giving up FBC whenever we detect an underrun. v2: New version, different implementation and commit message. v3: Clarify the fact that we run from an IRQ handler (Chris). v4: Also add the underrun_detected check at can_choose() to avoid misleading dmesg messages (DK). v5: Fix Engrish, use READ_ONCE on the unlocked read (Chris). Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Lyude <cpaul@redhat.com> Cc: stevenhoneyman@gmail.com <stevenhoneyman@gmail.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1473773937-19758-1-git-send-email-paulo.r.zanoni@intel.com
2016-09-13 10:38:57 -03:00
mutex_lock(&fbc->lock);
/* Maybe we were scheduled twice. */
if (fbc->underrun_detected || !fbc->state.plane)
drm/i915/fbc: disable FBC on FIFO underruns Ever since I started working on FBC I was already aware that FBC can really amplify the FIFO underrun symptoms. On systems where FIFO underruns were harmless error messages, enabling FBC would cause the underruns to give black screens. We recently tried to enable FBC on Haswell and got reports of a system that would hang after some hours of uptime, and the first bad commit was the one that enabled FBC. We also observed that this system had FIFO underrun error messages on its dmesg. Although we don't have any evidence that fixing the underruns would solve the bug and make FBC work properly on this machine, IMHO it's better if we minimize the amount of possible problems by just giving up FBC whenever we detect an underrun. v2: New version, different implementation and commit message. v3: Clarify the fact that we run from an IRQ handler (Chris). v4: Also add the underrun_detected check at can_choose() to avoid misleading dmesg messages (DK). v5: Fix Engrish, use READ_ONCE on the unlocked read (Chris). Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Lyude <cpaul@redhat.com> Cc: stevenhoneyman@gmail.com <stevenhoneyman@gmail.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1473773937-19758-1-git-send-email-paulo.r.zanoni@intel.com
2016-09-13 10:38:57 -03:00
goto out;
drm_dbg_kms(&i915->drm, "Disabling FBC due to FIFO underrun.\n");
drm/i915/fbc: disable FBC on FIFO underruns Ever since I started working on FBC I was already aware that FBC can really amplify the FIFO underrun symptoms. On systems where FIFO underruns were harmless error messages, enabling FBC would cause the underruns to give black screens. We recently tried to enable FBC on Haswell and got reports of a system that would hang after some hours of uptime, and the first bad commit was the one that enabled FBC. We also observed that this system had FIFO underrun error messages on its dmesg. Although we don't have any evidence that fixing the underruns would solve the bug and make FBC work properly on this machine, IMHO it's better if we minimize the amount of possible problems by just giving up FBC whenever we detect an underrun. v2: New version, different implementation and commit message. v3: Clarify the fact that we run from an IRQ handler (Chris). v4: Also add the underrun_detected check at can_choose() to avoid misleading dmesg messages (DK). v5: Fix Engrish, use READ_ONCE on the unlocked read (Chris). Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Lyude <cpaul@redhat.com> Cc: stevenhoneyman@gmail.com <stevenhoneyman@gmail.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1473773937-19758-1-git-send-email-paulo.r.zanoni@intel.com
2016-09-13 10:38:57 -03:00
fbc->underrun_detected = true;
intel_fbc_deactivate(fbc, "FIFO underrun");
if (!fbc->flip_pending)
intel_crtc_wait_for_next_vblank(intel_crtc_for_pipe(i915, fbc->state.plane->pipe));
__intel_fbc_disable(fbc);
drm/i915/fbc: disable FBC on FIFO underruns Ever since I started working on FBC I was already aware that FBC can really amplify the FIFO underrun symptoms. On systems where FIFO underruns were harmless error messages, enabling FBC would cause the underruns to give black screens. We recently tried to enable FBC on Haswell and got reports of a system that would hang after some hours of uptime, and the first bad commit was the one that enabled FBC. We also observed that this system had FIFO underrun error messages on its dmesg. Although we don't have any evidence that fixing the underruns would solve the bug and make FBC work properly on this machine, IMHO it's better if we minimize the amount of possible problems by just giving up FBC whenever we detect an underrun. v2: New version, different implementation and commit message. v3: Clarify the fact that we run from an IRQ handler (Chris). v4: Also add the underrun_detected check at can_choose() to avoid misleading dmesg messages (DK). v5: Fix Engrish, use READ_ONCE on the unlocked read (Chris). Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Lyude <cpaul@redhat.com> Cc: stevenhoneyman@gmail.com <stevenhoneyman@gmail.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1473773937-19758-1-git-send-email-paulo.r.zanoni@intel.com
2016-09-13 10:38:57 -03:00
out:
mutex_unlock(&fbc->lock);
}
static void __intel_fbc_reset_underrun(struct intel_fbc *fbc)
{
struct drm_i915_private *i915 = fbc->i915;
cancel_work_sync(&fbc->underrun_work);
mutex_lock(&fbc->lock);
if (fbc->underrun_detected) {
drm_dbg_kms(&i915->drm,
2020-03-10 10:52:41 +02:00
"Re-allowing FBC after fifo underrun\n");
fbc->no_fbc_reason = "FIFO underrun cleared";
}
fbc->underrun_detected = false;
mutex_unlock(&fbc->lock);
}
/*
* intel_fbc_reset_underrun - reset FBC fifo underrun status.
* @i915: the i915 device
*
* See intel_fbc_handle_fifo_underrun_irq(). For automated testing we
* want to re-enable FBC after an underrun to increase test coverage.
*/
void intel_fbc_reset_underrun(struct drm_i915_private *i915)
{
struct intel_fbc *fbc;
enum intel_fbc_id fbc_id;
for_each_intel_fbc(i915, fbc, fbc_id)
__intel_fbc_reset_underrun(fbc);
}
static void __intel_fbc_handle_fifo_underrun_irq(struct intel_fbc *fbc)
{
/*
* There's no guarantee that underrun_detected won't be set to true
* right after this check and before the work is scheduled, but that's
* not a problem since we'll check it again under the work function
* while FBC is locked. This check here is just to prevent us from
* unnecessarily scheduling the work, and it relies on the fact that we
* never switch underrun_detect back to false after it's true.
*/
if (READ_ONCE(fbc->underrun_detected))
return;
schedule_work(&fbc->underrun_work);
}
drm/i915/fbc: disable FBC on FIFO underruns Ever since I started working on FBC I was already aware that FBC can really amplify the FIFO underrun symptoms. On systems where FIFO underruns were harmless error messages, enabling FBC would cause the underruns to give black screens. We recently tried to enable FBC on Haswell and got reports of a system that would hang after some hours of uptime, and the first bad commit was the one that enabled FBC. We also observed that this system had FIFO underrun error messages on its dmesg. Although we don't have any evidence that fixing the underruns would solve the bug and make FBC work properly on this machine, IMHO it's better if we minimize the amount of possible problems by just giving up FBC whenever we detect an underrun. v2: New version, different implementation and commit message. v3: Clarify the fact that we run from an IRQ handler (Chris). v4: Also add the underrun_detected check at can_choose() to avoid misleading dmesg messages (DK). v5: Fix Engrish, use READ_ONCE on the unlocked read (Chris). Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Lyude <cpaul@redhat.com> Cc: stevenhoneyman@gmail.com <stevenhoneyman@gmail.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1473773937-19758-1-git-send-email-paulo.r.zanoni@intel.com
2016-09-13 10:38:57 -03:00
/**
* intel_fbc_handle_fifo_underrun_irq - disable FBC when we get a FIFO underrun
* @i915: i915 device
drm/i915/fbc: disable FBC on FIFO underruns Ever since I started working on FBC I was already aware that FBC can really amplify the FIFO underrun symptoms. On systems where FIFO underruns were harmless error messages, enabling FBC would cause the underruns to give black screens. We recently tried to enable FBC on Haswell and got reports of a system that would hang after some hours of uptime, and the first bad commit was the one that enabled FBC. We also observed that this system had FIFO underrun error messages on its dmesg. Although we don't have any evidence that fixing the underruns would solve the bug and make FBC work properly on this machine, IMHO it's better if we minimize the amount of possible problems by just giving up FBC whenever we detect an underrun. v2: New version, different implementation and commit message. v3: Clarify the fact that we run from an IRQ handler (Chris). v4: Also add the underrun_detected check at can_choose() to avoid misleading dmesg messages (DK). v5: Fix Engrish, use READ_ONCE on the unlocked read (Chris). Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Lyude <cpaul@redhat.com> Cc: stevenhoneyman@gmail.com <stevenhoneyman@gmail.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1473773937-19758-1-git-send-email-paulo.r.zanoni@intel.com
2016-09-13 10:38:57 -03:00
*
* Without FBC, most underruns are harmless and don't really cause too many
* problems, except for an annoying message on dmesg. With FBC, underruns can
* become black screens or even worse, especially when paired with bad
* watermarks. So in order for us to be on the safe side, completely disable FBC
* in case we ever detect a FIFO underrun on any pipe. An underrun on any pipe
* already suggests that watermarks may be bad, so try to be as safe as
* possible.
*
* This function is called from the IRQ handler.
*/
void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *i915)
drm/i915/fbc: disable FBC on FIFO underruns Ever since I started working on FBC I was already aware that FBC can really amplify the FIFO underrun symptoms. On systems where FIFO underruns were harmless error messages, enabling FBC would cause the underruns to give black screens. We recently tried to enable FBC on Haswell and got reports of a system that would hang after some hours of uptime, and the first bad commit was the one that enabled FBC. We also observed that this system had FIFO underrun error messages on its dmesg. Although we don't have any evidence that fixing the underruns would solve the bug and make FBC work properly on this machine, IMHO it's better if we minimize the amount of possible problems by just giving up FBC whenever we detect an underrun. v2: New version, different implementation and commit message. v3: Clarify the fact that we run from an IRQ handler (Chris). v4: Also add the underrun_detected check at can_choose() to avoid misleading dmesg messages (DK). v5: Fix Engrish, use READ_ONCE on the unlocked read (Chris). Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Lyude <cpaul@redhat.com> Cc: stevenhoneyman@gmail.com <stevenhoneyman@gmail.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1473773937-19758-1-git-send-email-paulo.r.zanoni@intel.com
2016-09-13 10:38:57 -03:00
{
struct intel_fbc *fbc;
enum intel_fbc_id fbc_id;
drm/i915/fbc: disable FBC on FIFO underruns Ever since I started working on FBC I was already aware that FBC can really amplify the FIFO underrun symptoms. On systems where FIFO underruns were harmless error messages, enabling FBC would cause the underruns to give black screens. We recently tried to enable FBC on Haswell and got reports of a system that would hang after some hours of uptime, and the first bad commit was the one that enabled FBC. We also observed that this system had FIFO underrun error messages on its dmesg. Although we don't have any evidence that fixing the underruns would solve the bug and make FBC work properly on this machine, IMHO it's better if we minimize the amount of possible problems by just giving up FBC whenever we detect an underrun. v2: New version, different implementation and commit message. v3: Clarify the fact that we run from an IRQ handler (Chris). v4: Also add the underrun_detected check at can_choose() to avoid misleading dmesg messages (DK). v5: Fix Engrish, use READ_ONCE on the unlocked read (Chris). Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Lyude <cpaul@redhat.com> Cc: stevenhoneyman@gmail.com <stevenhoneyman@gmail.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1473773937-19758-1-git-send-email-paulo.r.zanoni@intel.com
2016-09-13 10:38:57 -03:00
for_each_intel_fbc(i915, fbc, fbc_id)
__intel_fbc_handle_fifo_underrun_irq(fbc);
drm/i915/fbc: disable FBC on FIFO underruns Ever since I started working on FBC I was already aware that FBC can really amplify the FIFO underrun symptoms. On systems where FIFO underruns were harmless error messages, enabling FBC would cause the underruns to give black screens. We recently tried to enable FBC on Haswell and got reports of a system that would hang after some hours of uptime, and the first bad commit was the one that enabled FBC. We also observed that this system had FIFO underrun error messages on its dmesg. Although we don't have any evidence that fixing the underruns would solve the bug and make FBC work properly on this machine, IMHO it's better if we minimize the amount of possible problems by just giving up FBC whenever we detect an underrun. v2: New version, different implementation and commit message. v3: Clarify the fact that we run from an IRQ handler (Chris). v4: Also add the underrun_detected check at can_choose() to avoid misleading dmesg messages (DK). v5: Fix Engrish, use READ_ONCE on the unlocked read (Chris). Cc: Stefan Richter <stefanr@s5r6.in-berlin.de> Cc: Lyude <cpaul@redhat.com> Cc: stevenhoneyman@gmail.com <stevenhoneyman@gmail.com> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Dhinakaran Pandiyan <dhinakaran.pandiyan@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1473773937-19758-1-git-send-email-paulo.r.zanoni@intel.com
2016-09-13 10:38:57 -03:00
}
/*
* The DDX driver changes its behavior depending on the value it reads from
* i915.enable_fbc, so sanitize it by translating the default value into either
* 0 or 1 in order to allow it to know what's going on.
*
* Notice that this is done at driver initialization and we still allow user
* space to change the value during runtime without sanitizing it again. IGT
* relies on being able to change i915.enable_fbc at runtime.
*/
static int intel_sanitize_fbc_option(struct drm_i915_private *i915)
{
if (i915->params.enable_fbc >= 0)
return !!i915->params.enable_fbc;
if (!HAS_FBC(i915))
return 0;
if (IS_BROADWELL(i915) || DISPLAY_VER(i915) >= 9)
return 1;
return 0;
}
static bool need_fbc_vtd_wa(struct drm_i915_private *i915)
{
/* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */
Merge tag 'drm-intel-gt-next-2021-12-09' of git://anongit.freedesktop.org/drm/drm-intel into drm-next Core Changes: - Fix PENDING_ERROR leak in dma_fence_array_signaled() (Thomas Hellström) Driver Changes: - Fix runtime PM handling during PXP suspend (Tejas Upadhyay) - Improve eviction performance on discrete by implementing async TTM moves (Thomas Hellström, Maarten Lankhorst) - Improve robustness of error capture under memory pressure (Thomas Hellström) - Fix GuC PMU versus GPU reset handling (Umesh Nerlige Ramappa) - Use per device iommu check (Tvrtko Ursulin) - Make error capture work with async migration (Thomas Hellström) - Revert incorrect implementation of Wa_1508744258 causing hangs (José Roberto de Souza) - Disable coarse power gating on some DG2 steppings workaround (Matt Roper) - Add IC cache invalidation workaround on DG2 (Ramalingam C) - Move two Icelake workarounds to the right place (Raviteja Goud Talla) - Fix error pointer dereference in i915_gem_do_execbuffer() (Dan Carpenter) - Fixup a couple of generic and DG2 specific issues in migration code (Matthew Auld) - Fix kernel-doc warnings in i915_drm_object.c (Randy Dunlap) - Drop stealing of bits from i915_sw_fence function pointer (Matthew Brost) - Introduce new macros for i915 PTE (Michael Cheng) - Prep work for engine reset by reset domain lookup (Tejas Upadhyay) - Fixup drm-intel-gt-next build failure (Matthew Auld) - Fix live_engine_busy_stats selftests in GuC mode (Umesh Nerlige Ramappa) - Remove dma_resv_prune (Maarten Lankhorst) - Preserve huge pages enablement after driver reload (Matthew Auld) - Fix a NULL pointer dereference in igt_request_rewind() (selftests) (Zhou Qingyang) - Add workaround numbers to GEN7_COMMON_SLICE_CHICKEN1 whitelisting (José Roberto de Souza) - Increase timeouts in i915_gem_contexts selftests to handle GuC being slower (Bruce Chang) Signed-off-by: Dave Airlie <airlied@redhat.com> # Conflicts: # drivers/gpu/drm/i915/display/intel_fbc.c From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/YbIBOeqhn+nPzaYD@tursulin-mobl2
2021-12-10 15:35:20 +10:00
if (intel_vtd_active(i915) &&
(IS_SKYLAKE(i915) || IS_BROXTON(i915))) {
drm_info(&i915->drm,
2020-03-10 10:52:41 +02:00
"Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n");
return true;
}
return false;
}
void intel_fbc_add_plane(struct intel_fbc *fbc, struct intel_plane *plane)
{
if (!fbc)
return;
plane->fbc = fbc;
fbc->possible_framebuffer_bits |= plane->frontbuffer_bit;
}
static struct intel_fbc *intel_fbc_create(struct drm_i915_private *i915,
enum intel_fbc_id fbc_id)
{
struct intel_fbc *fbc;
fbc = kzalloc(sizeof(*fbc), GFP_KERNEL);
if (!fbc)
return NULL;
fbc->id = fbc_id;
fbc->i915 = i915;
INIT_WORK(&fbc->underrun_work, intel_fbc_underrun_work_fn);
mutex_init(&fbc->lock);
if (DISPLAY_VER(i915) >= 7)
fbc->funcs = &ivb_fbc_funcs;
else if (DISPLAY_VER(i915) == 6)
fbc->funcs = &snb_fbc_funcs;
else if (DISPLAY_VER(i915) == 5)
fbc->funcs = &ilk_fbc_funcs;
else if (IS_G4X(i915))
fbc->funcs = &g4x_fbc_funcs;
else if (DISPLAY_VER(i915) == 4)
fbc->funcs = &i965_fbc_funcs;
else
fbc->funcs = &i8xx_fbc_funcs;
return fbc;
}
/**
* intel_fbc_init - Initialize FBC
* @i915: the i915 device
*
* This function might be called during PM init process.
*/
void intel_fbc_init(struct drm_i915_private *i915)
{
enum intel_fbc_id fbc_id;
if (!drm_mm_initialized(&i915->mm.stolen))
mkwrite_device_info(i915)->display.fbc_mask = 0;
if (need_fbc_vtd_wa(i915))
mkwrite_device_info(i915)->display.fbc_mask = 0;
i915->params.enable_fbc = intel_sanitize_fbc_option(i915);
drm_dbg_kms(&i915->drm, "Sanitized enable_fbc value: %d\n",
i915->params.enable_fbc);
for_each_fbc_id(i915, fbc_id) {
struct intel_fbc *fbc;
fbc = intel_fbc_create(i915, fbc_id);
if (!fbc)
continue;
/*
* We still don't have any sort of hardware state readout
* for FBC, so deactivate it in case the BIOS activated it
* to make sure software matches the hardware state.
*/
if (intel_fbc_hw_is_active(fbc))
intel_fbc_hw_deactivate(fbc);
i915->fbc[fbc->id] = fbc;
}
}
static int intel_fbc_debugfs_status_show(struct seq_file *m, void *unused)
{
struct intel_fbc *fbc = m->private;
struct drm_i915_private *i915 = fbc->i915;
struct intel_plane *plane;
intel_wakeref_t wakeref;
drm_modeset_lock_all(&i915->drm);
wakeref = intel_runtime_pm_get(&i915->runtime_pm);
mutex_lock(&fbc->lock);
if (fbc->active) {
seq_puts(m, "FBC enabled\n");
seq_printf(m, "Compressing: %s\n",
str_yes_no(intel_fbc_is_compressing(fbc)));
} else {
seq_printf(m, "FBC disabled: %s\n", fbc->no_fbc_reason);
}
for_each_intel_plane(&i915->drm, plane) {
const struct intel_plane_state *plane_state =
to_intel_plane_state(plane->base.state);
if (plane->fbc != fbc)
continue;
seq_printf(m, "%c [PLANE:%d:%s]: %s\n",
fbc->state.plane == plane ? '*' : ' ',
plane->base.base.id, plane->base.name,
plane_state->no_fbc_reason ?: "FBC possible");
}
mutex_unlock(&fbc->lock);
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
drm_modeset_unlock_all(&i915->drm);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(intel_fbc_debugfs_status);
static int intel_fbc_debugfs_false_color_get(void *data, u64 *val)
{
struct intel_fbc *fbc = data;
*val = fbc->false_color;
return 0;
}
static int intel_fbc_debugfs_false_color_set(void *data, u64 val)
{
struct intel_fbc *fbc = data;
mutex_lock(&fbc->lock);
fbc->false_color = val;
if (fbc->active)
fbc->funcs->set_false_color(fbc, fbc->false_color);
mutex_unlock(&fbc->lock);
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(intel_fbc_debugfs_false_color_fops,
intel_fbc_debugfs_false_color_get,
intel_fbc_debugfs_false_color_set,
"%llu\n");
static void intel_fbc_debugfs_add(struct intel_fbc *fbc,
struct dentry *parent)
{
debugfs_create_file("i915_fbc_status", 0444, parent,
fbc, &intel_fbc_debugfs_status_fops);
if (fbc->funcs->set_false_color)
debugfs_create_file("i915_fbc_false_color", 0644, parent,
fbc, &intel_fbc_debugfs_false_color_fops);
}
void intel_fbc_crtc_debugfs_add(struct intel_crtc *crtc)
{
struct intel_plane *plane = to_intel_plane(crtc->base.primary);
if (plane->fbc)
intel_fbc_debugfs_add(plane->fbc, crtc->base.debugfs_entry);
}
/* FIXME: remove this once igt is on board with per-crtc stuff */
void intel_fbc_debugfs_register(struct drm_i915_private *i915)
{
struct drm_minor *minor = i915->drm.primary;
struct intel_fbc *fbc;
fbc = i915->fbc[INTEL_FBC_A];
if (fbc)
intel_fbc_debugfs_add(fbc, minor->debugfs_root);
}