platform support, more upscaling basics

This commit is contained in:
kub 2021-08-20 19:55:36 +02:00
parent e1bdcf531a
commit ff42e515ae
2 changed files with 353 additions and 73 deletions

View file

@ -6,15 +6,17 @@
* scaler types:
* nn: nearest neighbour
* snn: "smoothed" nearest neighbour (see below)
* bln: bilinear (using only 0.25, 0.5, 0.75 as weight for better performance)
* bln: n-level-bilinear with n quantized weights
* quantization: 0: a<1/2*n, 1/n: 1/2*n<=a<3/2*n, etc
* currently n=2, n=4 are implemented (there's n=8 mixing, but no filters)
* [NB this has been brought to my attn, which is probably the same as bl2:
* https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1]
*
* "smoothed" nearest neighbour: uses the average of the source pixels if no
* source pixel covers more than 65% of the result pixel. It definitely
* looks better than nearest neighbour and is still quite fast. It creates
* a sharper look than a bilinear filter, at the price of some visible jags
* on diagonal edges.
* [NB this has been brought to my attn, which is probably very similar:
* https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1]
*
* scaling modes:
* 256x___ -> 320x___ only horizontal scaling. Produces an aspect error of
@ -56,12 +58,21 @@ void upscale_rgb_snn_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si,
}
}
void upscale_rgb_bln_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal)
void upscale_rgb_bl2_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal)
{
int y;
for (y = 0; y < height; y++) {
h_upscale_bln_4_5(di, ds, si, ss, 256, f_pal);
h_upscale_bl2_4_5(di, ds, si, ss, 256, f_pal);
}
}
void upscale_rgb_bl4_256_320x___(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int height, u16 *pal)
{
int y;
for (y = 0; y < height; y++) {
h_upscale_bl4_4_5(di, ds, si, ss, 256, f_pal);
}
}
@ -140,7 +151,7 @@ void upscale_rgb_bln_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict
for (j = 0; j < 3; j++) {
h_upscale_bln_4_5(di, ds, si, ss, 256, f_pal);
}
/* lines 3-14 mixing prep */
/* lines 3-11 mixing prep */
di += ds;
for (j = 0; j < 11; j++) {
h_upscale_bln_4_5(di, ds, si, ss, 256, f_pal);
@ -154,7 +165,7 @@ void upscale_rgb_bln_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict
v_mix(&di[0], &di[0], &di[ds], 320, p_025, f_nop);
di += ds;
}
/* mixing lines 6-8 */
/* mixing line 6-8 */
for (j = 0; j < 3; j++) {
v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop);
di += ds;
@ -169,6 +180,120 @@ void upscale_rgb_bln_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict
}
}
void upscale_rgb_bl2_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
{
int y, j;
/* 14:15, 0 1 2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10 11 12 13 */
for (y = 0; y < 224; y += 14) {
for (j = 0; j < 3; j++) {
h_upscale_bl2_4_5(di, ds, si, ss, 256, f_pal);
}
di += ds;
for (j = 0; j < 11; j++) {
h_upscale_bl2_4_5(di, ds, si, ss, 256, f_pal);
}
/* mix lines 3-10 */
di -= 12*ds;
v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop);
for (j = 0; j < 7; j++) {
di += ds;
v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop);
}
di += 5*ds;
}
}
void upscale_rgb_bl4_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
{
int y, j;
/* 14:15, 0 0+1 1+2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10+11 11+12 12 13 */
for (y = 0; y < 224; y += 14) {
/* line 0 */
h_upscale_bl4_4_5(di, ds, si, ss, 256, f_pal);
/* lines 1-14 mixing prep */
di += ds;
for (j = 0; j < 13; j++) {
h_upscale_bl4_4_5(di, ds, si, ss, 256, f_pal);
}
di -= 14*ds;
/* mixing line 1: line 0 = -ds, line 1 = +ds */
v_mix(&di[0], &di[-ds], &di[ds], 320, p_025, f_nop);
di += ds;
/* mixing lines 2-4: line n-1 = 0, line n = +ds */
for (j = 0; j < 3; j++) {
v_mix(&di[0], &di[0], &di[ds], 320, p_025, f_nop);
di += ds;
}
/* mixing lines 5-8 */
for (j = 0; j < 4; j++) {
v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop);
di += ds;
}
/* mixing lines 9-12 */
for (j = 0; j < 4; j++) {
v_mix(&di[0], &di[0], &di[ds], 320, p_075, f_nop);
di += ds;
}
/* lines 13-14, already in place */
di += 2*ds;
}
}
void upscale_rgb_bl8_256_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
{
int y, j, d;
/* 14:15, -1+0 0+1 1+2 2+3 3+4 4+5 5+6 6+7 7+8 8+9 9+10 10+11 11+12 12+13 13 */
for (y = 0, d = ds; y < 224; y += 14, d = -ds) {
/* lines 0-14 mixing prep */
di += ds;
for (j = 0; j < 14; j++) {
h_upscale_bl8_4_5(di, ds, si, ss, 256, f_pal);
}
di -= 15*ds;
/* mixing line 0: line 0 = -ds, line 1 = +ds */
v_mix(&di[0], &di[d], &di[ds], 320, p_0125, f_nop);
di += ds;
/* mixing line 1: line 1 = 0, line 2 = +ds */
v_mix(&di[0], &di[0], &di[ds], 320, p_0125, f_nop);
di += ds;
/* mixing lines 2-3: line n-1 = 0, line n = +ds */
for (j = 0; j < 2; j++) {
v_mix(&di[0], &di[0], &di[ds], 320, p_025, f_nop);
di += ds;
}
/* mixing lines 4-5 */
for (j = 0; j < 2; j++) {
v_mix(&di[0], &di[0], &di[ds], 320, p_0375, f_nop);
di += ds;
}
/* mixing lines 6-7 */
for (j = 0; j < 2; j++) {
v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop);
di += ds;
}
/* mixing lines 8-9 */
for (j = 0; j < 2; j++) {
v_mix(&di[0], &di[0], &di[ds], 320, p_0625, f_nop);
di += ds;
}
/* mixing lines 10-11 */
for (j = 0; j < 2; j++) {
v_mix(&di[0], &di[0], &di[ds], 320, p_075, f_nop);
di += ds;
}
/* mixing lines 12-13 */
for (j = 0; j < 2; j++) {
v_mix(&di[0], &di[0], &di[ds], 320, p_0875, f_nop);
di += ds;
}
/* line 14, already in place */
di += ds;
}
}
/* 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (wrong for PAL) */
void upscale_clut_nn_320x224_240(u8 *__restrict di, int ds, u8 *__restrict si, int ss)
{
@ -230,7 +355,7 @@ void upscale_rgb_snn_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si,
}
}
void upscale_rgb_bln_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
void upscale_rgb_bl2_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
{
int y, j;
@ -238,23 +363,41 @@ void upscale_rgb_bln_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si,
for (j = 0; j < 3; j++) {
h_copy(di, ds, si, ss, 320, f_pal);
}
for (j = 0; j < 3; j++) {
v_mix(&di[0], &si[-ss], &si[0], 320, p_025, f_pal);
di += ds;
si += ss;
}
for (j = 0; j < 3; j++) {
for (j = 0; j < 8; j++) {
v_mix(&di[0], &si[-ss], &si[0], 320, p_05, f_pal);
di += ds;
si += ss;
}
for (j = 0; j < 3; j++) {
si -= ss;
for (j = 0; j < 4; j++) {
h_copy(di, ds, si, ss, 320, f_pal);
}
}
}
void upscale_rgb_bl4_320x224_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
{
int y, j;
for (y = 0; y < 224; y += 14) {
h_copy(di, ds, si, ss, 320, f_pal);
for (j = 0; j < 4; j++) {
v_mix(&di[0], &si[-ss], &si[0], 320, p_025, f_pal);
di += ds;
si += ss;
}
for (j = 0; j < 4; j++) {
v_mix(&di[0], &si[-ss], &si[0], 320, p_05, f_pal);
di += ds;
si += ss;
}
for (j = 0; j < 4; j++) {
v_mix(&di[0], &si[-ss], &si[0], 320, p_075, f_pal);
di += ds;
si += ss;
}
si -= ss;
for (j = 0; j < 3; j++) {
for (j = 0; j < 2; j++) {
h_copy(di, ds, si, ss, 320, f_pal);
}
}
@ -322,19 +465,43 @@ void upscale_rgb_snn_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict
}
}
void upscale_rgb_bln_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
void upscale_rgb_bl2_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
{
int y, j;
/* 3:5, 0 0+1 1 1+2 2 */
for (y = 0; y < 144; y += 3) {
for (j = 0; j < 3; j++) {
h_upscale_nn_1_2(di, ds, si, ss, 160, f_pal);
h_upscale_bl2_1_2(di, ds, si, ss, 160, f_pal);
di += ds;
}
di -= 5*ds;
v_mix(&di[0], &di[-ds], &di[ds], 320, p_075, f_nop);
for (j = 0; j < 2; j++) {
v_mix(&di[0], &di[-ds], &di[ds], 320, p_05, f_nop);
di += 2*ds;
}
}
}
void upscale_rgb_bl4_160_320x144_240(u16 *__restrict di, int ds, u8 *__restrict si, int ss, u16 *pal)
{
int y, j, d;
/* 3:5, -1+0, 0+1 0+1 1+2 2
* for 1st block backwards reference virtually duplicate source line 0 */
for (y = 0, d = 2*ds; y < 144; y += 3, d = -ds) {
di += 2*ds;
v_mix(&di[0], &di[-ds], &di[ds], 320, p_025, f_nop);
for (j = 0; j < 3; j++) {
h_upscale_bl2_1_2(di, ds, si, ss, 160, f_pal);
}
di -= 5*ds;
v_mix(&di[0], &di[d ], &di[2*ds], 320, p_05, f_nop); /*-1+0 */
di += ds;
v_mix(&di[0], &di[ds], &di[2*ds], 320, p_075, f_nop); /* 0+1 */
di += ds;
v_mix(&di[0], &di[ 0], &di[ ds], 320, p_025, f_nop); /* 0+1 */
di += ds;
v_mix(&di[0], &di[ 0], &di[ ds], 320, p_05, f_nop); /* 1+2 */
di += 2*ds;
}
}