vdp fifo speed optimization

This commit is contained in:
kub 2020-03-14 19:52:27 +01:00
parent 20fafa7127
commit c55a44a88c
5 changed files with 210 additions and 125 deletions

View file

@ -48,6 +48,135 @@ const unsigned char hcounts_32[] = {
0x82,0x83,0x83,0x84,0x85,0x85,0x86,0x87,0x87,0x88,0x89,0x8a,0x8a,0x8b,0x8c,0x8c,
};
// VDP transfer slots for blanked and active display in 32col and 40col mode.
// 1 slot is 488/171 = 2.8538 68k cycles in h32, and 488/210 = 2.3238 in h40
// In blanked display, all slots but 5(h32) / 6(h40) are usable for transfers,
// in active display only 16(h32) / 18(h40) slots can be used.
// XXX inactive tables by slot#=cycles*maxslot#/488. should be through hv tables
// VDP transfer slots in inactive (blanked) display 32col mode.
// refresh slots: 250, 26, 58, 90, 122 -> 32, 64, 96, 128, 160
const unsigned char vdpcyc2sl_32_bl[] = { // 68k cycles/2 to slot #
// 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30
0, 0, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 10,
10, 11, 12, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21,
21, 22, 23, 23, 24, 25, 25, 26, 27, 27, 28, 29, 29, 30, 31, 31,
32, 33, 34, 34, 35, 36, 36, 37, 38, 38, 39, 40, 40, 41, 42, 42,
43, 44, 44, 45, 46, 46, 47, 48, 48, 49, 50, 51, 51, 52, 53, 53,
54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 61, 61, 62, 63, 63, 64,
65, 65, 66, 67, 68, 68, 69, 70, 70, 71, 72, 72, 73, 74, 74, 75,
76, 76, 77, 78, 78, 79, 80, 80, 81, 82, 83, 83, 84, 85, 85, 86,
87, 87, 88, 89, 89, 90, 91, 91, 92, 93, 93, 94, 95, 95, 96, 97,
97, 98, 99,100,100,101,102,102,103,104,104,105,106,106,107,108,
108,109,110,110,111,112,112,113,114,114,115,116,117,117,118,119,
119,120,121,121,122,123,123,124,125,125,126,127,127,128,129,129,
130,131,131,132,133,134,134,135,136,136,137,138,138,139,140,140,
141,142,142,143,144,144,145,146,146,147,148,148,149,150,151,151,
152,153,153,154,155,155,156,157,157,158,159,159,160,161,161,162,
163,163,164,165,166,166,167,168,168,169,170,170,171,172,172,173,
};
// VDP transfer slots in inactive (blanked) display 40col mode.
// refresh slots: 250, 26, 58, 90, 122, 154 -> 40, 72, 104, 136, 168, 200
const unsigned char vdpcyc2sl_40_bl[] = { // 68k cycles/2 to slot #
// 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30
0, 0, 1, 2, 3, 4, 5, 5, 6, 7, 8, 9, 10, 10, 11, 12,
13, 14, 15, 15, 16, 17, 18, 19, 20, 20, 21, 22, 23, 24, 25, 25,
26, 27, 28, 29, 30, 30, 31, 32, 33, 34, 35, 35, 36, 37, 38, 39,
40, 40, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 51, 52,
53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 61, 62, 63, 64, 65, 66,
66, 67, 68, 69, 70, 71, 71, 72, 73, 74, 75, 76, 76, 77, 78, 79,
80, 81, 81, 82, 83, 84, 85, 86, 86, 87, 88, 89, 90, 91, 91, 92,
93, 94, 95, 96, 96, 97, 98, 99,100,101,102,102,103,104,105,106,
107,107,108,109,110,111,112,112,113,114,115,116,117,117,118,119,
120,121,122,122,123,124,125,126,127,127,128,129,130,131,132,132,
133,134,135,136,137,137,138,139,140,141,142,142,143,144,145,146,
147,147,148,149,150,151,152,153,153,154,155,156,157,158,158,159,
160,161,162,163,163,164,165,166,167,168,168,169,170,171,172,173,
173,174,175,176,177,178,178,179,180,181,182,183,183,184,185,186,
187,188,188,189,190,191,192,193,193,194,195,196,197,198,198,199,
200,201,202,203,204,204,205,206,207,208,209,209,210,211,212,213,
};
// VDP transfer slots in active display 32col mode. Transfer slots (Hint=0):
// 11,25,40,48,56,72,80,88,104,112,120,136,144,152,167,168
const unsigned char vdpcyc2sl_32[] = { // 68k cycles/2 to slot #
// 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10,
10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13,
13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
};
// VDP transfer slots in active display 40col mode. Transfer slots (Hint=0):
// 21,47,55,63,79,87,95,111,119,127,143,151,159,175,183,191,206,207
const unsigned char vdpcyc2sl_40[] = { // 68k cycles/2 to slot #
// 0 2 4 6 8 10 12 14 16 18 20 22 24 26 28 30
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, // 32
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 64
1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 96
3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, // 128
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, // 160
5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, // 192
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 224
7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, // 256
9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, // 288
10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, // 320
12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, // 352
13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, // 384
14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, // 416
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, // 448
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, // 480
};
// XXX inactive tables by cyc=slot#*488/maxslot#. should be through hv tables
const unsigned short vdpsl2cyc_32_bl[] = { // slot # to 68k cycles/2
0, 2, 3, 5, 6, 8, 9, 11, 12, 14, 15, 17, 18, 20, 21, 23,
24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, 45, 46,
48, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 64, 65, 67, 68, 70,
71, 73, 74, 75, 77, 78, 80, 81, 83, 84, 86, 87, 89, 90, 92, 93,
95, 96, 98, 99,100,102,103,105,106,108,109,111,112,114,115,117,
118,120,121,122,124,125,127,128,130,131,133,134,136,137,139,140,
142,143,145,146,147,149,150,152,153,155,156,158,159,161,162,164,
165,167,168,170,171,172,174,175,177,178,180,181,183,184,186,187,
189,190,192,193,195,196,197,199,200,202,203,205,206,208,209,211,
212,214,215,217,218,220,221,222,224,225,227,228,230,231,233,234,
236,237,239,240,242,243,244,246,
};
const unsigned short vdpsl2cyc_40_bl[] = { // slot # to 68k cycles/2
0, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18,
20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 38,
39, 40, 41, 42, 44, 45, 46, 47, 48, 50, 51, 52, 53, 54, 56, 57,
58, 59, 60, 61, 63, 64, 65, 66, 67, 69, 70, 71, 72, 73, 75, 76,
77, 78, 79, 81, 82, 83, 84, 85, 87, 88, 89, 90, 91, 93, 94, 95,
96, 97, 99,100,101,102,103,105,106,107,108,109,111,112,113,114,
115,117,118,119,120,121,122,124,125,126,127,128,130,131,132,133,
134,136,137,138,139,140,142,143,144,145,146,148,149,150,151,152,
154,155,156,157,158,160,161,162,163,164,166,167,168,169,170,172,
173,174,175,176,178,179,180,181,182,183,185,186,187,188,189,191,
192,193,194,195,197,198,199,200,201,203,204,205,206,207,209,210,
211,212,213,215,216,217,218,219,221,222,223,224,225,227,228,229,
230,231,233,234,235,236,237,239,240,241,242,243,244,246,
};
const unsigned short vdpsl2cyc_32[] = { // slot # to 68k cycles/2
0, 16, 36, 56, 67, 79,102,113,125,148,159,171,194,205,217,239,
240,260
};
const unsigned short vdpsl2cyc_40[] = { // slot # to 68k cycles/2
0, 24, 55, 64, 73, 92,101,110,129,138,147,166,175,184,203,212,
221,239,240,268
};
#ifndef _ASM_MISC_C
PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count)
{