sms, add fast renderer, remove 1st column (8 px) if blanked

This commit is contained in:
kub 2022-09-21 23:26:48 +00:00
parent 23e4719638
commit 96948bdfc8
15 changed files with 454 additions and 181 deletions

View file

@ -1646,13 +1646,17 @@ void FinalizeLine555(int sh, int line, struct PicoEState *est)
PicoDrawUpdateHighPal();
if ((PicoIn.AHW & PAHW_SMS) && (Pico.m.hardware & 0x3) == 0x3)
len = 160;
else if (Pico.video.reg[12]&1) len = 320;
else len = 256;
len = 256;
if ((PicoIn.AHW & PAHW_SMS) && (Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD))
len = 160;
else if (!(PicoIn.AHW & PAHW_SMS) && (Pico.video.reg[12]&1))
len = 320;
if ((PicoIn.AHW & PAHW_SMS) && (Pico.video.reg[0] & 0x20) && len == 256)
len -= 8, ps += 8;
if ((*est->PicoOpt & POPT_EN_SOFTSCALE) && len < 320) {
if (len == 256) {
if (len >= 240 && len <= 256) {
pd += (256-len)>>1;
switch (PicoIn.filter) {
case 3: h_upscale_bl4_4_5(pd, 320, ps, 256, len, f_pal); break;
case 2: h_upscale_bl2_4_5(pd, 320, ps, 256, len, f_pal); break;
@ -1706,10 +1710,13 @@ void FinalizeLine8bit(int sh, int line, struct PicoEState *est)
Pico.m.dirtyPal = 2;
}
if ((PicoIn.AHW & PAHW_SMS) && (Pico.m.hardware & 0x3) == 0x3)
len = 160;
else if (Pico.video.reg[12]&1) len = 320;
else len = 256;
len = 256;
if ((PicoIn.AHW & PAHW_SMS) && (Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD))
len = 160;
else if (!(PicoIn.AHW & PAHW_SMS) && (Pico.video.reg[12]&1))
len = 320;
if ((PicoIn.AHW & PAHW_SMS) && (Pico.video.reg[0] & 0x20) && len == 256)
len -= 8, ps += 8;
if (DrawLineDestIncrement == 0)
pd = est->HighCol+8;
@ -1720,7 +1727,7 @@ void FinalizeLine8bit(int sh, int line, struct PicoEState *est)
if (!sh && (est->rendstatus & PDRAW_SONIC_MODE))
pal = est->SonicPalCount*0x40;
// Smoothing can't be used with CLUT, hence it's always Nearest Neighbour.
if (len == 256)
if (len >= 240)
// use reverse version since src and dest ptr may be the same.
rh_upscale_nn_4_5(pd, 320, ps, 256, len, f_or);
else

View file

@ -1651,34 +1651,47 @@ FinalizeLine555:
ldr r4, [r5]
ldr r7, [r5, #OFS_PicoIn_AHW-OFS_PicoIn_opt]
ldrb r12,[r8, #OFS_Pico_video_reg+12]
ldrb r6, [r8, #OFS_Pico_video_reg+0]
ldr r2, [r8, #OFS_Pico_m_hardware]
add r1, r1, #8
tst r7, #0x10
beq .fl_no20colRGB555
tst r7, #0x10 @ SMS ?
beq .fl_noSMS
and r7, r2, #0x3
cmp r7, #0x3 @ Game Gear, LCD?
bne .fl_no20colRGB555
beq .fl_gg20col
tst r6, #0x20
movne r2, #248/8 @ len = 248
addne r1, r1, #8 @ ps += 8
moveq r2, #256/8 @ len = 256
b .fl_check32scaling
.fl_gg20col:
mov r2, #160/8 @ len = 160
tst r4, #0x4000 @ EN_SOFTSCALE?
bne .fl_20scale_RGB555 @ scale 160->320
beq .fl_checkborder
b .fl_checkborder
.fl_no20colRGB555:
.fl_noSMS:
tst r12, #1 @ h32?
movne r2, #320/8 @ len = 320
bne .fl_no32colRGB555
moveq r2, #256/8 @ len = 256
bne .fl_40colRGB555
mov r2, #256/8 @ len = 256
.fl_check32scaling:
tst r4, #0x4000 @ EN_SOFTSCALE?
rsbne r7, r2, #256/8
addne r0, r0, r7, lsl #3 @ pd += (256-len)>>1
bne .fl_32scale_RGB555 @ scale 256->320
.fl_checkborder:
tst r4, #0x0100 @ DIS_32C_BORDER?
rsbeq r4, r2, #320/8 @ pd += (320-len)/2
addeq r0, r0, r4, lsl #3
rsbeq r7, r2, #320/8 @ pd += (320-len)/2
addeq r0, r0, r7, lsl #3
.fl_no32colRGB555:
.fl_40colRGB555:
#ifdef UNALIGNED_DRAWLINEDEST
@ this is basically for Gizmondo, which has unaligned odd lines in the framebuffer
tst r0, #2
@ -2250,24 +2263,51 @@ blockcpy:
cmp r0, r1
bhs blockcpyhi
mov r2, r2, lsr #4
subs r2, r2, #16
blt blockcpy2
blockcpy_loop:
ldmia r1!, {r3-r5,r12}
subs r2, r2, #1
subs r2, r2, #16
stmia r0!, {r3-r5,r12}
bne blockcpy_loop
bge blockcpy_loop
blockcpy2:
adds r2, r2, #16-4
ldmltfd sp!, {r4,r5}
bxlt lr
blockcpy_loop2:
ldr r3, [r1], #4
subs r2, r2, #4
str r3, [r0], #4
bge blockcpy_loop2
ldmfd sp!, {r4,r5}
bx lr
blockcpyhi:
add r0, r0, r2
add r1, r1, r2
mov r2, r2, lsr #4
subs r2, r2, #16
blt blockcpyhi2
blockcpyhi_loop:
ldmdb r1!, {r3-r5,r12}
subs r2, r2, #1
subs r2, r2, #16
stmdb r0!, {r3-r5,r12}
bne blockcpyhi_loop
bge blockcpyhi_loop
blockcpyhi2:
adds r2, r2, #16-4
ldmltfd sp!, {r4,r5}
bxlt lr
blockcpyhi_loop2:
ldr r3, [r1, #-4]!
subs r2, r2, #4
str r3, [r0, #-4]!
bge blockcpyhi_loop2
ldmfd sp!, {r4,r5}
bx lr
@ -2281,32 +2321,61 @@ blockcpy_or:
cmp r0, r1
bhs blockcpyhi_or
mov r2, r2, lsr #4
subs r2, r2, #16
blt blockcpy_or2
blockcpy_loop_or:
ldmia r1!, {r4-r6,r12}
subs r2, r2, #1
subs r2, r2, #16
orr r4, r4, r3
orr r5, r5, r3
orr r6, r6, r3
orr r12,r12,r3
stmia r0!, {r4-r6,r12}
bne blockcpy_loop_or
bge blockcpy_loop_or
blockcpy_or2:
adds r2, r2, #16-4
ldmltfd sp!, {r4-r6}
bxlt lr
blockcpy_loop_or2:
ldr r4, [r1], #4
subs r2, r2, #4
orr r4, r4, r3
str r4, [r0], #4
bge blockcpy_loop_or2
ldmfd sp!, {r4-r6}
bx lr
blockcpyhi_or:
add r0, r0, r2
add r1, r1, r2
mov r2, r2, lsr #4
subs r2, r2, #16
blt blockcpyhi_or2
blockcpyhi_loop_or:
ldmdb r1!, {r4-r6,r12}
subs r2, r2, #1
subs r2, r2, #16
orr r4, r4, r3
orr r5, r5, r3
orr r6, r6, r3
orr r12,r12,r3
stmdb r0!, {r4-r6,r12}
bne blockcpyhi_loop_or
bge blockcpyhi_loop_or
blockcpyhi_or2:
adds r2, r2, #16-4
ldmltfd sp!, {r4-r6}
bxlt lr
blockcpyhi_loop_or2:
ldr r4, [r1, #-4]!
subs r2, r2, #4
orr r4, r4, r3
str r4, [r0, #-4]!
bge blockcpyhi_loop_or2
ldmfd sp!, {r4-r6}
bx lr

View file

@ -154,15 +154,14 @@ static void ParseSpritesM4(int scanline)
{
struct PicoVideo *pv = &Pico.video;
u8 *sat;
int xoff = 8; // relative to HighCol, which is (screen - 8)
int xoff = line_offset;
int sprite_base, addr_mask;
int zoomed = pv->reg[1] & 0x1; // zoomed sprites, e.g. Earthworm Jim
unsigned int pack;
int i, s, h, m;
if (pv->reg[0] & 8)
xoff = 0;
xoff += line_offset;
xoff -= 8; // sprite shift
if ((Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD))
xoff -= 48; // GG LCD, adjust to center 160 px
@ -204,6 +203,9 @@ static void ParseSpritesM4(int scanline)
// make sprite pixel map by merging the 4 bitplanes
pack = ((pack | (pack>>16)) | ((pack | (pack>>16))>>8)) & 0xff;
if (!m) m = CollisionDetect(sprites_map, sprites_x[s], pack, zoomed);
// no collision detection in 1st column if it's masked
if (pv->reg[0] & 0x20)
sprites_map[1] = 0;
}
s++;
}
@ -290,28 +292,26 @@ static void DrawDisplayM4(int scanline)
if (scanline < 16 && (pv->reg[0] & 0x40))
dx = 0; // hscroll disabled for top 2 rows (e.g. Fantasy Zone II)
tilex = ((-dx >> 3) + cellskip) & 0x1f;
tilex = (32 - (dx >> 3) + cellskip) & 0x1f;
ty = (line & 7) << 1; // Y-Offset into tile
cells = maxcells - cellskip;
dx = ((dx - 1) & 7) + 1;
if (dx != 8)
cells++; // have hscroll, need to draw 1 cell more
dx = (dx & 7);
dx += cellskip << 3;
dx += line_offset;
// tiles
if (!(pv->debug_p & PVD_KILL_B)) {
if ((Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD)) {
// on GG render only the center 160 px
DrawStripM4(nametab , dx | ((cells-12)<< 16),(tilex+6) | (ty << 16));
// on GG render only the center 160 px, but mind hscroll
DrawStripM4(nametab , (dx-8) | ((cells-11)<< 16),(tilex+5) | (ty << 16));
} else if (pv->reg[0] & 0x80) {
// vscroll disabled for rightmost 8 columns (e.g. Gauntlet)
int dx2 = dx + (cells-8)*8, tilex2 = tilex + (cells-8), ty2 = scanline&7;
DrawStripM4(nametab, dx | ((cells-8) << 16), tilex | (ty << 16));
DrawStripM4(nametab2, dx2 | (8 << 16), tilex2 | (ty2 << 17));
DrawStripM4(nametab, dx | ((cells-8) << 16), tilex | (ty << 16));
DrawStripM4(nametab2, dx2 | (8 << 16), tilex2 | (ty2 << 17));
} else
DrawStripM4(nametab , dx | ( cells << 16), tilex | (ty << 16));
DrawStripM4(nametab , dx | ( cells << 16), tilex | (ty << 16));
}
// sprites
@ -320,9 +320,9 @@ static void DrawDisplayM4(int scanline)
if ((pv->reg[0] & 0x20) && (Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) != (PMS_HW_GG|PMS_HW_LCD)) {
// first column masked with background, caculate offset to start of line
dx = (dx&~0x1f) / 4;
dx = line_offset / 4;
ty = ((pv->reg[7]&0x0f)|0x10) * 0x01010101;
((u32 *)Pico.est.HighCol)[dx+2] = ((u32 *)Pico.est.HighCol)[dx+3] = ty;
((u32 *)Pico.est.HighCol)[dx] = ((u32 *)Pico.est.HighCol)[dx+1] = ty;
}
}
@ -432,12 +432,12 @@ static void ParseSpritesTMS(int scanline)
struct PicoVideo *pv = &Pico.video;
unsigned int pack;
u8 *sat;
int xoff = 8; // relative to HighCol, which is (screen - 8)
int xoff;
int sprite_base, addr_mask;
int zoomed = pv->reg[1] & 0x1; // zoomed sprites
int i, s, h, m;
xoff += line_offset;
xoff = line_offset;
sat = (u8 *)PicoMem.vramb + ((pv->reg[5] & 0x7e) << 7);
if (pv->reg[1] & 2) {
@ -561,7 +561,7 @@ static void DrawDisplayM1(int scanline)
tilex = cellskip & 0x1f;
cells = maxcells - cellskip;
dx = (cellskip << 3) + line_offset + 8;
dx = (cellskip << 3) + line_offset;
// tiles
if (!(pv->debug_p & PVD_KILL_B))
@ -607,7 +607,7 @@ static void DrawDisplayM2(int scanline)
tilex = cellskip & 0x1f;
cells = maxcells - cellskip;
dx = (cellskip << 3) + line_offset + 8;
dx = (cellskip << 3) + line_offset;
// tiles
if (!(pv->debug_p & PVD_KILL_B))
@ -658,7 +658,7 @@ static void DrawDisplayM3(int scanline)
tilex = cellskip & 0x1f;
cells = maxcells - cellskip;
dx = (cellskip << 3) + line_offset + 8;
dx = (cellskip << 3) + line_offset;
// tiles
if (!(pv->debug_p & PVD_KILL_B))
@ -708,7 +708,7 @@ static void DrawDisplayM0(int scanline)
tilex = cellskip & 0x1f;
cells = maxcells - cellskip;
dx = (cellskip << 3) + line_offset + 8;
dx = (cellskip << 3) + line_offset;
// tiles
if (!(pv->debug_p & PVD_KILL_B))
@ -729,6 +729,7 @@ static void FinalizeLine8bitSMS(int line);
void PicoFrameStartSMS(void)
{
int lines = 192, columns = 256, loffs, coffs;
skip_next_line = 0;
loffs = screen_offset = 24; // 192 lines is really 224 with top/bottom bars
Pico.est.rendstatus = PDRAW_32_COLS;
@ -741,9 +742,14 @@ void PicoFrameStartSMS(void)
// Copy LCD enable flag for easier handling
Pico.m.hardware &= ~PMS_HW_LCD;
if (PicoIn.opt & POPT_EN_GG_LCD)
if ((PicoIn.opt & POPT_EN_GG_LCD) && (Pico.m.hardware & PMS_HW_GG))
Pico.m.hardware |= PMS_HW_LCD;
if (!(Pico.m.hardware & PMS_HW_LCD) && (mode & 4) && (Pico.video.reg[0] & 0x20)) {
// SMS mode 4 with 1st column blanked
columns = 248;
Pico.est.rendstatus |= PDRAW_SMS_BLANK_1;
}
if ((Pico.m.hardware & (PMS_HW_GG|PMS_HW_LCD)) == (PMS_HW_GG|PMS_HW_LCD)) {
// GG LCD always has 160x144 regardless of settings
screen_offset = 24; // nonetheless the vdp timing has 224 lines
@ -761,15 +767,20 @@ void PicoFrameStartSMS(void)
lines = 224;
break;
}
if (PicoIn.opt & POPT_EN_SOFTSCALE) {
line_offset = 8; // FinalizeLine requires HighCol+8
// ugh... nonetheless has offset in 8-bit fast mode if 1st col blanked!
coffs = (FinalizeLineSMS == NULL && columns == 248 ? 8 : 0);
if (FinalizeLineSMS != NULL && (PicoIn.opt & POPT_EN_SOFTSCALE)) {
// softscaling always generates 320px, but no scaling in 8bit fast
coffs = 0;
columns = 320;
} else
coffs = PicoIn.opt & POPT_DIS_32C_BORDER ? 0:(320-columns)/2;
line_offset = (FinalizeLineSMS == NULL ? coffs : 0);
if (FinalizeLineSMS == FinalizeLineRGB555SMS)
line_offset = 0 /* done in FinalizeLine */;
} else if (!(PicoIn.opt & POPT_DIS_32C_BORDER)) {
line_offset -= coffs;
coffs = (320-columns) / 2;
if (FinalizeLineSMS == NULL)
line_offset += coffs; // ... else centering done in FinalizeLine
}
if (Pico.est.rendstatus != rendstatus_old || lines != rendlines) {
emu_video_mode_change(loffs, lines, coffs, columns);
@ -836,13 +847,16 @@ norender:
/* Palette for TMS9918 mode, see https://www.smspower.org/Development/Palette */
// RGB values: #000000 #000000 #21c842 #5edc78 #5455ed #7d76fc #d4524d #42ebf5
// #fc5554 #ff7978 #d4c154 #e6ce80 #21b03b #c95bba #cccccc #ffffff
static u16 tmspal[32] = {
static u16 tmspal[] = {
// SMS palette
0x0000, 0x0000, 0x00a0, 0x00f0, 0x0a00, 0x0f00, 0x0005, 0x0ff0,
0x000a, 0x000f, 0x00aa, 0x00ff, 0x0050, 0x0f0f, 0x0aaa, 0x0fff,
// GG palette
// TMS palette
0x0000, 0x0000, 0x04c2, 0x07d6, 0x0e55, 0x0f77, 0x055c, 0x0ee4,
0x055f, 0x077f, 0x05bc, 0x08ce, 0x03a2, 0x0b5c, 0x0ccc, 0x0fff,
// SMS palette, closer to the TMS one
0x0000, 0x0000, 0x05f0, 0x05f5, 0x0a50, 0x0f55, 0x055a, 0x0ff0,
0x055f, 0x0aaf, 0x05aa, 0x05af, 0x00a0, 0x0f5f, 0x0aaa, 0x0fff,
};
void PicoDoHighPal555SMS(void)
@ -853,11 +867,11 @@ void PicoDoHighPal555SMS(void)
unsigned int t;
int i, j;
if (FinalizeLineSMS != FinalizeLine8bitSMS || Pico.m.dirtyPal == 2)
if (FinalizeLineSMS == FinalizeLineRGB555SMS || Pico.m.dirtyPal == 2)
Pico.m.dirtyPal = 0;
// use hardware palette for 16bit accurate mode
if (FinalizeLineSMS == FinalizeLineRGB555SMS)
// use hardware palette if not in 8bit accurate mode
if (FinalizeLineSMS != FinalizeLine8bitSMS)
spal = (void *)PicoMem.cram;
/* SMS 6 bit cram data was already converted to MD/GG format by vdp write,
@ -906,8 +920,7 @@ void PicoDrawSetOutputSMS(pdso_t which)
{
case PDF_8BIT: FinalizeLineSMS = FinalizeLine8bitSMS; break;
case PDF_RGB555: FinalizeLineSMS = FinalizeLineRGB555SMS; break;
// there's no fast renderer yet, just treat it like PDF_8BIT
default: FinalizeLineSMS = FinalizeLine8bitSMS;
default: FinalizeLineSMS = NULL; // no multiple palettes, no scaling
PicoDrawSetInternalBuf(Pico.est.Draw2FB, 328); break;
}
rendstatus_old = -1;

View file

@ -227,6 +227,7 @@ void PicoDoHighPal555(int sh, int line, struct PicoEState *est);
#define PDRAW_SKIP_FRAME (1<<10) // frame is skipped
#define PDRAW_30_ROWS (1<<11) // 30 rows mode (240 lines)
#define PDRAW_32X_SCALE (1<<12) // scale CLUT layer for 32X
#define PDRAW_SMS_BLANK_1 (1<<13) // 1st column blanked
extern int rendstatus_old;
extern int rendlines;

View file

@ -461,7 +461,7 @@ static void write_bank_xor(unsigned short a, unsigned char d)
if ((a&0x6000) != 0x2000) return;
if (Pico.ms.mapper != PMS_MAP_XOR && Pico.ms.mapper) return;
elprintf(EL_Z80BNK, "bank 32k %04x %02x @ %04x", a, d, z80_pc());
elprintf(EL_Z80BNK, "bank xor %04x %02x @ %04x", a, d, z80_pc());
Pico.ms.mapper = PMS_MAP_XOR;
Pico.ms.carthw[0] = d;
@ -544,7 +544,7 @@ static void xwrite(unsigned int a, unsigned char d)
}
}
// TMR product codes and hardware type for know 50Hz-only games
// TMR product codes and hardware type for known 50Hz-only games
static u32 region_pal[] = { // cf. GX+, core/cart_hw/sms_cartc.c
0x40207067 /* Addams Family */, 0x40207020 /* Back.Future 3 */,
0x40207058 /* Battlemaniacs */, 0x40007105 /* Cal.Games 2 */,

View file

@ -46,6 +46,10 @@
#include "../pico_types.h"
#include "resampler.h"
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
static double besseli0(double x)
{
unsigned i;