linux/drivers/video/fbdev/core/cfbimgblt.c
Thomas Zimmermann 2a81dba4b5 fbdev: Fix cfb_imageblit() for arbitrary image widths
Commit 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
broke cfb_imageblit() for image widths that are not aligned to 8-bit
boundaries. Fix this by handling the trailing pixels on each line
separately. The performance improvements in the original commit do not
regress by this change.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: 0d03011894d2 ("fbdev: Improve performance of cfb_imageblit()")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Javier Martinez Canillas <javierm@redhat.com>
Cc: Sam Ravnborg <sam@ravnborg.org>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
Tested-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220313192952.12058-3-tzimmermann@suse.de
2022-03-25 09:55:54 +10:00

367 lines
9.6 KiB
C

/*
* Generic BitBLT function for frame buffer with packed pixels of any depth.
*
* Copyright (C) June 1999 James Simmons
*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file COPYING in the main directory of this archive for
* more details.
*
* NOTES:
*
* This function copys a image from system memory to video memory. The
* image can be a bitmap where each 0 represents the background color and
* each 1 represents the foreground color. Great for font handling. It can
* also be a color image. This is determined by image_depth. The color image
* must be laid out exactly in the same format as the framebuffer. Yes I know
* their are cards with hardware that coverts images of various depths to the
* framebuffer depth. But not every card has this. All images must be rounded
* up to the nearest byte. For example a bitmap 12 bits wide must be two
* bytes width.
*
* Tony:
* Incorporate mask tables similar to fbcon-cfb*.c in 2.4 API. This speeds
* up the code significantly.
*
* Code for depths not multiples of BITS_PER_LONG is still kludgy, which is
* still processed a bit at a time.
*
* Also need to add code to deal with cards endians that are different than
* the native cpu endians. I also need to deal with MSB position in the word.
*/
#include <linux/module.h>
#include <linux/string.h>
#include <linux/fb.h>
#include <asm/types.h>
#include "fb_draw.h"
#define DEBUG
#ifdef DEBUG
#define DPRINTK(fmt, args...) printk(KERN_DEBUG "%s: " fmt,__func__,## args)
#else
#define DPRINTK(fmt, args...)
#endif
static const u32 cfb_tab8_be[] = {
0x00000000,0x000000ff,0x0000ff00,0x0000ffff,
0x00ff0000,0x00ff00ff,0x00ffff00,0x00ffffff,
0xff000000,0xff0000ff,0xff00ff00,0xff00ffff,
0xffff0000,0xffff00ff,0xffffff00,0xffffffff
};
static const u32 cfb_tab8_le[] = {
0x00000000,0xff000000,0x00ff0000,0xffff0000,
0x0000ff00,0xff00ff00,0x00ffff00,0xffffff00,
0x000000ff,0xff0000ff,0x00ff00ff,0xffff00ff,
0x0000ffff,0xff00ffff,0x00ffffff,0xffffffff
};
static const u32 cfb_tab16_be[] = {
0x00000000, 0x0000ffff, 0xffff0000, 0xffffffff
};
static const u32 cfb_tab16_le[] = {
0x00000000, 0xffff0000, 0x0000ffff, 0xffffffff
};
static const u32 cfb_tab32[] = {
0x00000000, 0xffffffff
};
#define FB_WRITEL fb_writel
#define FB_READL fb_readl
static inline void color_imageblit(const struct fb_image *image,
struct fb_info *p, u8 __iomem *dst1,
u32 start_index,
u32 pitch_index)
{
/* Draw the penguin */
u32 __iomem *dst, *dst2;
u32 color = 0, val, shift;
int i, n, bpp = p->var.bits_per_pixel;
u32 null_bits = 32 - bpp;
u32 *palette = (u32 *) p->pseudo_palette;
const u8 *src = image->data;
u32 bswapmask = fb_compute_bswapmask(p);
dst2 = (u32 __iomem *) dst1;
for (i = image->height; i--; ) {
n = image->width;
dst = (u32 __iomem *) dst1;
shift = 0;
val = 0;
if (start_index) {
u32 start_mask = ~fb_shifted_pixels_mask_u32(p,
start_index, bswapmask);
val = FB_READL(dst) & start_mask;
shift = start_index;
}
while (n--) {
if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
p->fix.visual == FB_VISUAL_DIRECTCOLOR )
color = palette[*src];
else
color = *src;
color <<= FB_LEFT_POS(p, bpp);
val |= FB_SHIFT_HIGH(p, color, shift ^ bswapmask);
if (shift >= null_bits) {
FB_WRITEL(val, dst++);
val = (shift == null_bits) ? 0 :
FB_SHIFT_LOW(p, color, 32 - shift);
}
shift += bpp;
shift &= (32 - 1);
src++;
}
if (shift) {
u32 end_mask = fb_shifted_pixels_mask_u32(p, shift,
bswapmask);
FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
}
dst1 += p->fix.line_length;
if (pitch_index) {
dst2 += p->fix.line_length;
dst1 = (u8 __iomem *)((long __force)dst2 & ~(sizeof(u32) - 1));
start_index += pitch_index;
start_index &= 32 - 1;
}
}
}
static inline void slow_imageblit(const struct fb_image *image, struct fb_info *p,
u8 __iomem *dst1, u32 fgcolor,
u32 bgcolor,
u32 start_index,
u32 pitch_index)
{
u32 shift, color = 0, bpp = p->var.bits_per_pixel;
u32 __iomem *dst, *dst2;
u32 val, pitch = p->fix.line_length;
u32 null_bits = 32 - bpp;
u32 spitch = (image->width+7)/8;
const u8 *src = image->data, *s;
u32 i, j, l;
u32 bswapmask = fb_compute_bswapmask(p);
dst2 = (u32 __iomem *) dst1;
fgcolor <<= FB_LEFT_POS(p, bpp);
bgcolor <<= FB_LEFT_POS(p, bpp);
for (i = image->height; i--; ) {
shift = val = 0;
l = 8;
j = image->width;
dst = (u32 __iomem *) dst1;
s = src;
/* write leading bits */
if (start_index) {
u32 start_mask = ~fb_shifted_pixels_mask_u32(p,
start_index, bswapmask);
val = FB_READL(dst) & start_mask;
shift = start_index;
}
while (j--) {
l--;
color = (*s & (1 << l)) ? fgcolor : bgcolor;
val |= FB_SHIFT_HIGH(p, color, shift ^ bswapmask);
/* Did the bitshift spill bits to the next long? */
if (shift >= null_bits) {
FB_WRITEL(val, dst++);
val = (shift == null_bits) ? 0 :
FB_SHIFT_LOW(p, color, 32 - shift);
}
shift += bpp;
shift &= (32 - 1);
if (!l) { l = 8; s++; }
}
/* write trailing bits */
if (shift) {
u32 end_mask = fb_shifted_pixels_mask_u32(p, shift,
bswapmask);
FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
}
dst1 += pitch;
src += spitch;
if (pitch_index) {
dst2 += pitch;
dst1 = (u8 __iomem *)((long __force)dst2 & ~(sizeof(u32) - 1));
start_index += pitch_index;
start_index &= 32 - 1;
}
}
}
/*
* fast_imageblit - optimized monochrome color expansion
*
* Only if: bits_per_pixel == 8, 16, or 32
* image->width is divisible by pixel/dword (ppw);
* fix->line_legth is divisible by 4;
* beginning and end of a scanline is dword aligned
*/
static inline void fast_imageblit(const struct fb_image *image, struct fb_info *p,
u8 __iomem *dst1, u32 fgcolor,
u32 bgcolor)
{
u32 fgx = fgcolor, bgx = bgcolor, bpp = p->var.bits_per_pixel;
u32 ppw = 32/bpp, spitch = (image->width + 7)/8;
u32 bit_mask, eorx, shift;
const char *s = image->data, *src;
u32 __iomem *dst;
const u32 *tab = NULL;
size_t tablen;
u32 colortab[16];
int i, j, k;
switch (bpp) {
case 8:
tab = fb_be_math(p) ? cfb_tab8_be : cfb_tab8_le;
tablen = 16;
break;
case 16:
tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le;
tablen = 4;
break;
case 32:
tab = cfb_tab32;
tablen = 2;
break;
default:
return;
}
for (i = ppw-1; i--; ) {
fgx <<= bpp;
bgx <<= bpp;
fgx |= fgcolor;
bgx |= bgcolor;
}
bit_mask = (1 << ppw) - 1;
eorx = fgx ^ bgx;
k = image->width/ppw;
for (i = 0; i < tablen; ++i)
colortab[i] = (tab[i] & eorx) ^ bgx;
for (i = image->height; i--; ) {
dst = (u32 __iomem *)dst1;
shift = 8;
src = s;
/*
* Manually unroll the per-line copying loop for better
* performance. This works until we processed the last
* completely filled source byte (inclusive).
*/
switch (ppw) {
case 4: /* 8 bpp */
for (j = k; j >= 2; j -= 2, ++src) {
FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
}
break;
case 2: /* 16 bpp */
for (j = k; j >= 4; j -= 4, ++src) {
FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
}
break;
case 1: /* 32 bpp */
for (j = k; j >= 8; j -= 8, ++src) {
FB_WRITEL(colortab[(*src >> 7) & bit_mask], dst++);
FB_WRITEL(colortab[(*src >> 6) & bit_mask], dst++);
FB_WRITEL(colortab[(*src >> 5) & bit_mask], dst++);
FB_WRITEL(colortab[(*src >> 4) & bit_mask], dst++);
FB_WRITEL(colortab[(*src >> 3) & bit_mask], dst++);
FB_WRITEL(colortab[(*src >> 2) & bit_mask], dst++);
FB_WRITEL(colortab[(*src >> 1) & bit_mask], dst++);
FB_WRITEL(colortab[(*src >> 0) & bit_mask], dst++);
}
break;
}
/*
* For image widths that are not a multiple of 8, there
* are trailing pixels left on the current line. Print
* them as well.
*/
for (; j--; ) {
shift -= ppw;
FB_WRITEL(colortab[(*src >> shift) & bit_mask], dst++);
if (!shift) {
shift = 8;
++src;
}
}
dst1 += p->fix.line_length;
s += spitch;
}
}
void cfb_imageblit(struct fb_info *p, const struct fb_image *image)
{
u32 fgcolor, bgcolor, start_index, bitstart, pitch_index = 0;
u32 bpl = sizeof(u32), bpp = p->var.bits_per_pixel;
u32 width = image->width;
u32 dx = image->dx, dy = image->dy;
u8 __iomem *dst1;
if (p->state != FBINFO_STATE_RUNNING)
return;
bitstart = (dy * p->fix.line_length * 8) + (dx * bpp);
start_index = bitstart & (32 - 1);
pitch_index = (p->fix.line_length & (bpl - 1)) * 8;
bitstart /= 8;
bitstart &= ~(bpl - 1);
dst1 = p->screen_base + bitstart;
if (p->fbops->fb_sync)
p->fbops->fb_sync(p);
if (image->depth == 1) {
if (p->fix.visual == FB_VISUAL_TRUECOLOR ||
p->fix.visual == FB_VISUAL_DIRECTCOLOR) {
fgcolor = ((u32*)(p->pseudo_palette))[image->fg_color];
bgcolor = ((u32*)(p->pseudo_palette))[image->bg_color];
} else {
fgcolor = image->fg_color;
bgcolor = image->bg_color;
}
if (32 % bpp == 0 && !start_index && !pitch_index &&
((width & (32/bpp-1)) == 0) &&
bpp >= 8 && bpp <= 32)
fast_imageblit(image, p, dst1, fgcolor, bgcolor);
else
slow_imageblit(image, p, dst1, fgcolor, bgcolor,
start_index, pitch_index);
} else
color_imageblit(image, p, dst1, start_index, pitch_index);
}
EXPORT_SYMBOL(cfb_imageblit);
MODULE_AUTHOR("James Simmons <jsimmons@users.sf.net>");
MODULE_DESCRIPTION("Generic software accelerated imaging drawing");
MODULE_LICENSE("GPL");