mirror of
https://github.com/RaySollium99/picodrive.git
synced 2025-09-04 23:07:46 -04:00
32x DMA memory copy performance optimisation
This commit is contained in:
parent
8141d75694
commit
346153e08e
5 changed files with 105 additions and 15 deletions
|
@ -2261,7 +2261,7 @@ static int emit_get_rom_data(SH2 *sh2, sh2_reg_e r, u32 offs, int size, u32 *val
|
||||||
if (gconst_get(r, &a)) {
|
if (gconst_get(r, &a)) {
|
||||||
a += offs;
|
a += offs;
|
||||||
// check if rom is memory mapped (not bank switched), and address is in rom
|
// check if rom is memory mapped (not bank switched), and address is in rom
|
||||||
if (dr_is_rom(a) && p32x_sh2_get_mem_ptr(a, &mask, sh2)) {
|
if (dr_is_rom(a) && p32x_sh2_get_mem_ptr(a, &mask, sh2) != (void *)-1) {
|
||||||
switch (size & MF_SIZEMASK) {
|
switch (size & MF_SIZEMASK) {
|
||||||
case 0: *val = (s8)p32x_sh2_read8(a, sh2s); break; // 8
|
case 0: *val = (s8)p32x_sh2_read8(a, sh2s); break; // 8
|
||||||
case 1: *val = (s16)p32x_sh2_read16(a, sh2s); break; // 16
|
case 1: *val = (s16)p32x_sh2_read16(a, sh2s); break; // 16
|
||||||
|
@ -4896,12 +4896,7 @@ void sh2_drc_flush_all(void)
|
||||||
|
|
||||||
void sh2_drc_mem_setup(SH2 *sh2)
|
void sh2_drc_mem_setup(SH2 *sh2)
|
||||||
{
|
{
|
||||||
// fill the convenience pointers
|
// fill the DRC-only convenience pointers
|
||||||
sh2->p_bios = sh2->is_slave ? Pico32xMem->sh2_rom_s.w : Pico32xMem->sh2_rom_m.w;
|
|
||||||
sh2->p_da = sh2->data_array;
|
|
||||||
sh2->p_sdram = Pico32xMem->sdram;
|
|
||||||
sh2->p_rom = Pico.rom;
|
|
||||||
// sh2->p_dram filled in dram bank switching
|
|
||||||
sh2->p_drcblk_da = Pico32xMem->drcblk_da[!!sh2->is_slave];
|
sh2->p_drcblk_da = Pico32xMem->drcblk_da[!!sh2->is_slave];
|
||||||
sh2->p_drcblk_ram = Pico32xMem->drcblk_ram;
|
sh2->p_drcblk_ram = Pico32xMem->drcblk_ram;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1855,17 +1855,15 @@ void *p32x_sh2_get_mem_ptr(u32 a, u32 *mask, SH2 *sh2)
|
||||||
{
|
{
|
||||||
const sh2_memmap *mm = sh2->read8_map;
|
const sh2_memmap *mm = sh2->read8_map;
|
||||||
void *ret = (void *)-1;
|
void *ret = (void *)-1;
|
||||||
u32 am;
|
|
||||||
|
|
||||||
mm += a >> SH2_READ_SHIFT;
|
mm += SH2MAP_ADDR2OFFS_R(a);
|
||||||
am = a & ((1 << SH2_READ_SHIFT)-1);
|
if (!map_flag_set(mm->addr)) {
|
||||||
if (!map_flag_set(mm->addr) && !(am & ~mm->mask)) {
|
|
||||||
// directly mapped memory (SDRAM, ROM, data array)
|
// directly mapped memory (SDRAM, ROM, data array)
|
||||||
ret = (void *)(mm->addr << 1);
|
ret = (void *)(mm->addr << 1);
|
||||||
*mask = mm->mask;
|
*mask = mm->mask;
|
||||||
} else if ((a & ~0x7ff) == 0) {
|
} else if ((a & ~0x7ff) == 0) {
|
||||||
// BIOS, has handler function since it shares its segment with I/O
|
// BIOS, has handler function since it shares its segment with I/O
|
||||||
ret = sh2->is_slave ? Pico32xMem->sh2_rom_s.w : Pico32xMem->sh2_rom_m.w;
|
ret = sh2->p_bios;
|
||||||
*mask = 0x7ff;
|
*mask = 0x7ff;
|
||||||
} else if ((a & 0xc6000000) == 0x02000000) {
|
} else if ((a & 0xc6000000) == 0x02000000) {
|
||||||
// banked ROM. Return bank address
|
// banked ROM. Return bank address
|
||||||
|
@ -1877,6 +1875,75 @@ void *p32x_sh2_get_mem_ptr(u32 a, u32 *mask, SH2 *sh2)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int p32x_sh2_memcpy(u32 dst, u32 src, int count, int size, SH2 *sh2)
|
||||||
|
{
|
||||||
|
u32 mask;
|
||||||
|
void *ps, *pd;
|
||||||
|
int len, i;
|
||||||
|
|
||||||
|
// check if src and dst points to memory (rom/sdram/dram/da)
|
||||||
|
if ((pd = p32x_sh2_get_mem_ptr(dst, &mask, sh2)) == (void *)-1)
|
||||||
|
return 0;
|
||||||
|
if ((ps = p32x_sh2_get_mem_ptr(src, &mask, sh2)) == (void *)-1)
|
||||||
|
return 0;
|
||||||
|
ps += src & mask;
|
||||||
|
len = count * size;
|
||||||
|
|
||||||
|
// DRAM in byte access is always in overwrite mode
|
||||||
|
if (pd == sh2->p_dram && size == 1)
|
||||||
|
dst |= 0x20000;
|
||||||
|
|
||||||
|
// align dst to halfword
|
||||||
|
if (dst & 1) {
|
||||||
|
p32x_sh2_write8(dst, *(u8 *)((uptr)ps ^ 1), sh2);
|
||||||
|
ps++, dst++, len --;
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy data
|
||||||
|
if ((uptr)ps & 1) {
|
||||||
|
// unaligned, use halfword copy mode to reduce memory bandwidth
|
||||||
|
u16 *sp = (u16 *)(ps - 1);
|
||||||
|
u16 dl, dh = *sp++;
|
||||||
|
for (i = 0; i < (len & ~1); i += 2, dst += 2, sp++) {
|
||||||
|
dl = dh, dh = *sp;
|
||||||
|
p32x_sh2_write16(dst, (dh >> 8) | (dl << 8), sh2);
|
||||||
|
}
|
||||||
|
if (len & 1)
|
||||||
|
p32x_sh2_write8(dst, dh, sh2);
|
||||||
|
} else {
|
||||||
|
// dst and src at least halfword aligned
|
||||||
|
u16 *sp = (u16 *)ps;
|
||||||
|
// align dst to word
|
||||||
|
if ((dst & 2) && len >= 2) {
|
||||||
|
p32x_sh2_write16(dst, *sp++, sh2);
|
||||||
|
dst += 2, len -= 2;
|
||||||
|
}
|
||||||
|
if ((uptr)sp & 2) {
|
||||||
|
// halfword copy, using word writes to reduce memory bandwidth
|
||||||
|
u16 dl, dh;
|
||||||
|
for (i = 0; i < (len & ~3); i += 4, dst += 4, sp += 2) {
|
||||||
|
dl = sp[0], dh = sp[1];
|
||||||
|
p32x_sh2_write32(dst, (dl << 16) | dh, sh2);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// word copy
|
||||||
|
u32 d;
|
||||||
|
for (i = 0; i < (len & ~3); i += 4, dst += 4, sp += 2) {
|
||||||
|
d = *(u32 *)sp;
|
||||||
|
p32x_sh2_write32(dst, (d << 16) | (d >> 16), sh2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (len & 2) {
|
||||||
|
p32x_sh2_write16(dst, *sp++, sh2);
|
||||||
|
dst += 2;
|
||||||
|
}
|
||||||
|
if (len & 1)
|
||||||
|
p32x_sh2_write8(dst, *sp >> 8, sh2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
// -----------------------------------------------------------------
|
// -----------------------------------------------------------------
|
||||||
|
|
||||||
static void z80_md_bank_write_32x(unsigned int a, unsigned char d)
|
static void z80_md_bank_write_32x(unsigned int a, unsigned char d)
|
||||||
|
@ -2107,8 +2174,12 @@ void Pico32xSwapDRAM(int b)
|
||||||
ssh2_read16_map[0x04/2].addr = ssh2_read16_map[0x24/2].addr =
|
ssh2_read16_map[0x04/2].addr = ssh2_read16_map[0x24/2].addr =
|
||||||
ssh2_read32_map[0x04/2].addr = ssh2_read32_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]);
|
ssh2_read32_map[0x04/2].addr = ssh2_read32_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]);
|
||||||
|
|
||||||
msh2.p_dram = ssh2.p_dram = Pico32xMem->dram[b]; // DRC conveniance ptr
|
// convenience ptrs
|
||||||
msh2.p_rom = ssh2.p_rom = Pico.rom;
|
msh2.p_sdram = ssh2.p_sdram = Pico32xMem->sdram;
|
||||||
|
msh2.p_dram = ssh2.p_dram = Pico32xMem->dram[b];
|
||||||
|
msh2.p_rom = ssh2.p_rom = Pico.rom;
|
||||||
|
msh2.p_bios = Pico32xMem->sh2_rom_m.w; msh2.p_da = msh2.data_array;
|
||||||
|
ssh2.p_bios = Pico32xMem->sh2_rom_s.w; ssh2.p_da = ssh2.data_array;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void bank_switch_rom_sh2(void)
|
static void bank_switch_rom_sh2(void)
|
||||||
|
|
|
@ -129,6 +129,24 @@ static void dmac_transfer_one(SH2 *sh2, struct dma_chan *chan)
|
||||||
chan->sar += size;
|
chan->sar += size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// optimization for copying around memory with SH2 DMA
|
||||||
|
static void dmac_memcpy(struct dma_chan *chan, SH2 *sh2)
|
||||||
|
{
|
||||||
|
u32 size = (chan->chcr >> 10) & 3, up = chan->chcr & (1 << 14);
|
||||||
|
int count;
|
||||||
|
|
||||||
|
if (!up || chan->tcr < 4)
|
||||||
|
return;
|
||||||
|
if (size == 3) size = 2; // 4-word xfer mode still counts in words
|
||||||
|
// XXX check TCR being a multiple of 4 in 4-word xfer mode?
|
||||||
|
// XXX check alignment of sar/dar, generating a bus error if unaligned?
|
||||||
|
count = p32x_sh2_memcpy(chan->dar, chan->sar, chan->tcr, 1 << size, sh2);
|
||||||
|
|
||||||
|
chan->sar += count << size;
|
||||||
|
chan->dar += count << size;
|
||||||
|
chan->tcr -= count;
|
||||||
|
}
|
||||||
|
|
||||||
// DMA trigger by SH2 register write
|
// DMA trigger by SH2 register write
|
||||||
static void dmac_trigger(SH2 *sh2, struct dma_chan *chan)
|
static void dmac_trigger(SH2 *sh2, struct dma_chan *chan)
|
||||||
{
|
{
|
||||||
|
@ -139,6 +157,11 @@ static void dmac_trigger(SH2 *sh2, struct dma_chan *chan)
|
||||||
if (chan->chcr & DMA_AR) {
|
if (chan->chcr & DMA_AR) {
|
||||||
// auto-request transfer
|
// auto-request transfer
|
||||||
sh2->state |= SH2_STATE_SLEEP;
|
sh2->state |= SH2_STATE_SLEEP;
|
||||||
|
if ((((chan->chcr >> 12) ^ (chan->chcr >> 14)) & 3) == 0 &&
|
||||||
|
(((chan->chcr >> 14) ^ (chan->chcr >> 15)) & 1) == 1) {
|
||||||
|
// SM == DM and either DM0 or DM1 are set. check for mem to mem copy
|
||||||
|
dmac_memcpy(chan, sh2);
|
||||||
|
}
|
||||||
while ((int)chan->tcr > 0)
|
while ((int)chan->tcr > 0)
|
||||||
dmac_transfer_one(sh2, chan);
|
dmac_transfer_one(sh2, chan);
|
||||||
dmac_transfer_complete(sh2, chan);
|
dmac_transfer_complete(sh2, chan);
|
||||||
|
|
|
@ -937,6 +937,7 @@ unsigned int REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, unsigned int d, S
|
||||||
unsigned int REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, unsigned int d, SH2 *sh2);
|
unsigned int REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, unsigned int d, SH2 *sh2);
|
||||||
void *p32x_sh2_get_mem_ptr(unsigned int a, unsigned int *mask, SH2 *sh2);
|
void *p32x_sh2_get_mem_ptr(unsigned int a, unsigned int *mask, SH2 *sh2);
|
||||||
void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles);
|
void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles);
|
||||||
|
int p32x_sh2_memcpy(unsigned int dst, unsigned int src, int count, int size, SH2 *sh2);
|
||||||
|
|
||||||
// 32x/draw.c
|
// 32x/draw.c
|
||||||
void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode);
|
void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode);
|
||||||
|
|
|
@ -89,7 +89,7 @@ get_define OFS_PMEM32x_ Pico32xMem pal_native ; echo "$line" >>$fn
|
||||||
get_define OFS_SH2_ SH2_ is_slave ; echo "$line" >>$fn
|
get_define OFS_SH2_ SH2_ is_slave ; echo "$line" >>$fn
|
||||||
get_define OFS_SH2_ SH2_ p_bios ; echo "$line" >>$fn
|
get_define OFS_SH2_ SH2_ p_bios ; echo "$line" >>$fn
|
||||||
get_define OFS_SH2_ SH2_ p_da ; echo "$line" >>$fn
|
get_define OFS_SH2_ SH2_ p_da ; echo "$line" >>$fn
|
||||||
get_define OFS_SH2_ SH2_ p_sdram ; echo "$line" >>$fn
|
get_define OFS_SH2_ SH2_ p_sdram ; echo "$line" >>$fn
|
||||||
get_define OFS_SH2_ SH2_ p_rom ; echo "$line" >>$fn
|
get_define OFS_SH2_ SH2_ p_rom ; echo "$line" >>$fn
|
||||||
get_define OFS_SH2_ SH2_ p_dram ; echo "$line" >>$fn
|
get_define OFS_SH2_ SH2_ p_dram ; echo "$line" >>$fn
|
||||||
get_define OFS_SH2_ SH2_ p_drcblk_da ; echo "$line" >>$fn
|
get_define OFS_SH2_ SH2_ p_drcblk_da ; echo "$line" >>$fn
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue