diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..40b77daf --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,284 @@ +name: Picodrive CI +on: [push, pull_request] +jobs: + build-linux: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Install dependencies + run: | + sudo apt-get update -qq + sudo apt-get install -y libsdl1.2-dev libasound2-dev libpng-dev libz-dev + - name: configure + run: DUMP_CONFIG_LOG=1 ./configure + - name: make + run: make -j$(getconf _NPROCESSORS_ONLN) + + build-libretro: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: make + run: LDFLAGS=-Wl,--no-undefined make -j$(getconf _NPROCESSORS_ONLN) -f Makefile.libretro + + + build-gp2x: + runs-on: ubuntu-latest + container: ghcr.io/irixxxx/toolchain-gp2x + permissions: + packages: read + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: build + run: | + git config --global --add safe.directory $PWD + ver=$(cut -d'"' -f2 platform/common/version.h)-$(git rev-parse --short HEAD) + ./configure --platform=gph + make -j$(getconf _NPROCESSORS_ONLN) PLATFORM_MP3=0 + make -C platform/gp2x rel VER=$ver + mv PicoDrive_$ver.zip PicoDrive-gph_$ver.zip + - name: artifacts + uses: actions/upload-artifact@v4 + with: + name: GP2X + path: PicoDrive-gph_*.zip + + build-pandora: + runs-on: ubuntu-latest + container: ghcr.io/irixxxx/toolchain-pandora + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: build + run: | + git config --global --add safe.directory $PWD + ver=$(cut -d'"' -f2 platform/common/version.h)-$(git rev-parse --short HEAD) + ./configure --platform=pandora + make -j$(getconf _NPROCESSORS_ONLN) + make -C platform/pandora rel VER=$ver + mv platform/pandora/PicoDrive_*.pnd . + - name: artifacts + uses: actions/upload-artifact@v4 + with: + name: Pandora + path: PicoDrive_*.pnd + + build-psp: + runs-on: ubuntu-latest + container: pspdev/pspdev + steps: + - name: build environment + run: | + apk add git gcc g++ zip + - uses: actions/checkout@v4 + with: + submodules: true + - name: build + run: | + export CROSS_COMPILE=psp- + git config --global --add safe.directory $PWD + ver=$(cut -d'"' -f2 platform/common/version.h)-$(git rev-parse --short HEAD) + ./configure --platform=psp + make -j$(getconf _NPROCESSORS_ONLN) + make -C platform/psp rel VER=$ver + - name: artifacts + uses: actions/upload-artifact@v4 + with: + name: PSP + path: PicoDrive_psp_*.zip + + build-ps2: + runs-on: ubuntu-latest + container: ps2dev/ps2dev + steps: + - name: build environment + run: | + apk add build-base cmake git zip make + - uses: actions/checkout@v4 + with: + submodules: true + - name: build + run: | + export CROSS_COMPILE=mips64r5900el-ps2-elf- + git config --global --add safe.directory $PWD + ver=$(cut -d'"' -f2 platform/common/version.h)-$(git rev-parse --short HEAD) + ./configure --platform=ps2 + make -j$(getconf _NPROCESSORS_ONLN) + make -C platform/ps2 rel VER=$ver + - name: artifacts + uses: actions/upload-artifact@v4 + with: + name: PS2 + path: PicoDrive_ps2_*.zip + + + build-dingux: + runs-on: ubuntu-latest + permissions: + packages: read + container: ghcr.io/irixxxx/toolchain-dingux + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: build + run: | + git config --global --add safe.directory $PWD + ver=$(cut -d'"' -f2 platform/common/version.h)-$(git rev-parse --short HEAD) + ./configure --platform=dingux + make -j$(getconf _NPROCESSORS_ONLN) + mv PicoDrive-dge.zip PicoDrive-dge-$ver.zip + - name: artifacts + uses: actions/upload-artifact@v4 + with: + name: Dingux + path: PicoDrive-dge*.zip + + build-gcw0: + runs-on: ubuntu-latest + container: ghcr.io/irixxxx/toolchain-opendingux + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: build + run: | + git config --global --add safe.directory $PWD + ver=$(cut -d'"' -f2 platform/common/version.h)-$(git rev-parse --short HEAD) + ./configure --platform=opendingux-gcw0 + make -j$(getconf _NPROCESSORS_ONLN) + mv PicoDrive.opk PicoDrive-gcw0-$ver.opk + - name: artifacts + uses: actions/upload-artifact@v4 + with: + name: GCW0 + path: PicoDrive-gcw0*.opk + + build-opendingux: + runs-on: ubuntu-latest + container: ghcr.io/irixxxx/toolchain-opendingux + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: build + run: | + git config --global --add safe.directory $PWD + ver=$(cut -d'"' -f2 platform/common/version.h)-$(git rev-parse --short HEAD) + ./configure --platform=opendingux + make -j$(getconf _NPROCESSORS_ONLN) + mv PicoDrive.opk PicoDrive-opendingux-$ver.opk + - name: artifacts + uses: actions/upload-artifact@v4 + with: + name: Opendingux + path: PicoDrive-opendingux*.opk + + build-miyoo: + runs-on: ubuntu-latest + permissions: + packages: read + container: ghcr.io/irixxxx/toolchain-miyoo + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: build + run: | + git config --global --add safe.directory $PWD + ver=$(cut -d'"' -f2 platform/common/version.h)-$(git rev-parse --short HEAD) + ./configure --platform=miyoo + make -j$(getconf _NPROCESSORS_ONLN) + mv PicoDrive-miyoo.ipk PicoDrive-miyoo-$ver.ipk + - name: artifacts + uses: actions/upload-artifact@v4 + with: + name: Miyoo + path: PicoDrive-miyoo*.ipk + + build-retrofw: + runs-on: ubuntu-latest + container: ghcr.io/irixxxx/toolchain-retrofw + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: build + run: | + git config --global --add safe.directory $PWD + ver=$(cut -d'"' -f2 platform/common/version.h)-$(git rev-parse --short HEAD) + ./configure --platform=retrofw + make -j$(getconf _NPROCESSORS_ONLN) + mv PicoDrive.opk PicoDrive-retrofw-$ver.opk + - name: artifacts + uses: actions/upload-artifact@v4 + with: + name: RetroFW + path: PicoDrive-retrofw*.opk + + build-odbeta-gcw0: + runs-on: ubuntu-latest + container: ghcr.io/irixxxx/toolchain-odbeta-gcw0 + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: build + run: | + git config --global --add safe.directory $PWD + ver=$(cut -d'"' -f2 platform/common/version.h)-$(git rev-parse --short HEAD) + ./configure --platform=odbeta + make -j$(getconf _NPROCESSORS_ONLN) + mv PicoDrive.opk PicoDrive-odbeta-gcw0-$ver.opk + - name: artifacts + uses: actions/upload-artifact@v4 + with: + name: ODbeta gcw0 + path: PicoDrive-odbeta-*.opk + + build-odbeta-lepus: + runs-on: ubuntu-latest + container: ghcr.io/irixxxx/toolchain-odbeta-lepus + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: build + run: | + git config --global --add safe.directory $PWD + ver=$(cut -d'"' -f2 platform/common/version.h)-$(git rev-parse --short HEAD) + ./configure --platform=odbeta + make -j$(getconf _NPROCESSORS_ONLN) + mv PicoDrive.opk PicoDrive-odbeta-lepus-$ver.opk + - name: artifacts + uses: actions/upload-artifact@v4 + with: + name: ODbeta lepus + path: PicoDrive-odbeta-*.opk + + build-odbeta-rg99: + runs-on: ubuntu-latest + container: ghcr.io/irixxxx/toolchain-odbeta-rs90 + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: build + run: | + git config --global --add safe.directory $PWD + ver=$(cut -d'"' -f2 platform/common/version.h)-$(git rev-parse --short HEAD) + ./configure --platform=odbeta + make -j$(getconf _NPROCESSORS_ONLN) + mv PicoDrive.opk PicoDrive-odbeta-rg99-$ver.opk + - name: artifacts + uses: actions/upload-artifact@v4 + with: + name: ODbeta rg99 + path: PicoDrive-odbeta-*.opk diff --git a/.github/workflows/prepare.yml b/.github/workflows/prepare.yml new file mode 100644 index 00000000..e4f4b552 --- /dev/null +++ b/.github/workflows/prepare.yml @@ -0,0 +1,19 @@ +name: prepare +on: workflow_call + +jobs: + prepare: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + - name: create release + id: create_release + if: ${{ github.ref_type == 'tag' }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + tag=$(echo ${{ github.ref }} | sed 's:refs/tags/::') + gh release create "$tag" --verify-tag -d -t "$tag" -n "$tag" + diff --git a/.gitignore b/.gitignore index 9f71eb92..430a4548 100644 --- a/.gitignore +++ b/.gitignore @@ -1,18 +1,42 @@ *.o *.swp +*.SFO +*.PBP +*.dll +*.lib +*.pdb +*.manifest +*.map +*.exp +*.dylib +*.so +*.a tags cscope.out -PicoDrive.map config.mak config.log cpu/musashi/m68kmake cpu/musashi/m68kops.c cpu/musashi/m68kops.h -skin config.cfg +skin srm/ brm/ mds/ +rom/ cfg/ libs/ obj/ +.opk_data +.od_data +PicoDrive +picodrive +picodrive_libretro* +PicoDrive*.opk +PicoDrive*.ipk +PicoDrive*.zip +PicoDrive*.elf +pico_int_offs.h +amalgamate +textfilter +.vscode/ diff --git a/.gitmodules b/.gitmodules index b27d2a16..799824ae 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,16 @@ [submodule "platform/libpicofe"] path = platform/libpicofe - url = git://notaz.gp2x.de/~notaz/libpicofe.git + url = https://github.com/raytf/libpicofe.git [submodule "cpu/cyclone"] path = cpu/cyclone - url = git://notaz.gp2x.de/~notaz/cyclone68000.git + url = https://github.com/irixxxx/cyclone68000.git +[submodule "pico/sound/emu2413"] + path = pico/sound/emu2413 + url = https://github.com/digital-sound-antiques/emu2413.git + branch = main +[submodule "pico/cd/libchdr"] + path = pico/cd/libchdr + url = https://github.com/irixxxx/libchdr.git +[submodule "platform/common/dr_libs"] + path = platform/common/dr_libs + url = https://github.com/mackron/dr_libs.git diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 00000000..e5ed7580 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,62 @@ +notaz +core, 32X emulation, CD code, ARM asm renderers, dynamic recompilers, +Pandora, GPH device, PSP, Gizmondo ports, CPU core hacks +lots of additional coding (see changeLog). +Homepage: http://notaz.gp2x.de/ + +irixxxx +improvements to dynamic recompilers, 32X emulation, ARM asm, sound, VDP, +platforms (GPH, PSP, generic linux), added SG-1000 and Game Gear support, +fixed a lot of bugs (and probably added more new bugs), cleaned up stuff, +probably more that I've already forgotten. + +fDave +project starter +Cyclone 68000 core and PicoDrive core itself + +Chui +FAME/C 68k interpreter core +(based on C68K by Stephane Dallongeville) + +Stephane Dallongeville (written), NJ (optimized) +CZ80 Z80 interpreter core + +Reesy & FluBBa +DrZ80, the Z80 interpreter written in ARM assembly. +Homepage: http://reesy.gp32x.de/ (defunct) + +Tatsuyuki Satoh, Jarek Burczynski, MAME development +software implementation of Yamaha FM sound generator + +MAME development +Texas Instruments SN76489 / SN76496 programmable tone/noise generator +Homepage: http://www.mame.net/ + +Eke-Eke +CD graphics processor and CD controller implementation (from Genesis Plus GX) + +Additional thanks +----------------- + +* Charles MacDonald (http://cgfm2.emuviews.com/) for old but still very useful + info about genesis hardware. +* Steve Snake for all that he has done for Genesis emulation scene. +* Stephane Dallongeville for writing Gens and making it open source. +* Tasco Deluxe for his reverse engineering work on SVP and some mappers. +* Bart Trzynadlowski for his SSFII and 68000 docs. +* Haze for his research (http://mamedev.emulab.it/haze/). +* Lordus, Exophase and Rokas for various ideas. +* Nemesis for his YM2612, VDP research and docs. +* Eke-Eke for sharing the knowledge and his work on Genesis Plus GX. +* Many posters at spritesmind.net forums for valuable information. +* Mark and Jean-loup for zlib library. +* ketchupgun for the skin. +* GP2X specific help: rlyeh, Squidge, Dzz, A_SN, Alex and GP32X posters. +* Gizmondo code: Kingcdr, Reesy, jens.l (for the device itself) +* Hardware: craigix (GP2X), EvilDragon (Wiz, Caanoo, Pandora, ...) + and jens.l (Gizmondo) +* Paul Cercueil for OpenDingux port. +* Inder for some graphics. +* squarepusher for some libretro fixes +* Hiroshica for support of japanese Mark-III extended YM2413 sound +* Anyone else I forgot. Let me know if it's you. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 00000000..a8ba44eb --- /dev/null +++ b/ChangeLog @@ -0,0 +1,487 @@ +1.91 (2013-10-12) + + Added OpenDingux support (Paul Cercueil). + * Save directory changed to ~/.picodrive/ for generic platform build + (Paul Cercueil). + + Revived GP2X/Caanoo/Wiz support. + + Switched to cleaner CD controller code from Eke-Eke's Genesis Plus GX. + * Fixed overflow issue where cd emulation would break after + ~10 minutes of gameplay. + * Fixed synchronization issue where model1 CD BIOS would randomly hang. + +1.90 (2013-09-24) + + 32X+CD emulation has been implemented. + + CD graphics processor code has been replaced with much cleaner Eke-Eke's + implementation from Genesis Plus GX. + + CD PCM code has been completely rewritten. + * Various CD compatibility issues have been solved. Hopefully no more + regressions this time. + * pandora: fixed tv-out (again), added automatic layer switching + * libretro: fixed crackling sound for some games, added some core options + * sdl: multiple joystick support has been fixed (Victor Luchits) + +1.85 (2013-08-31) + * Lots of 32X compatibility and accuracy improvements. All commercial games + are booting now, but some still have issues. + * Fixed some regressions in MegaCD code, like hang in jap BIOS. + * Implemented pause for SMS. + * Updated UI with improvements from PCSX ReARMed. + * Frontend timing has been rewritten, should no longer slowly desync from + LCD on pandora. + * Added libretro and SDL 32/64bit ports, fixed compatibility issues with + Android, iOS. + * Various other things I forgot (it has been a while since last release..) + +1.80 (2010-09-19) + + Added Caanoo support. Now the GP2X binary supports GP2X F100/F200, Wiz + and Caanoo. Lots of internal refactoring to support this. + + Enabled 32X and SMS code. It's still unfinished but better release something + now than wait even more (it has been in development for more then a year now + due to various other projects or simply lack of time). + + Pandora: added hardware scaler support, including ability to resize the + layer and control filtering. + + GP2X: Added basic line-doubling vertical scaling option. + * Changed the way keys are bound, no need to unbind old one any more. + * Handle MP3s with ID3 tags better (some MP3s with ID3 did not play). + * Improved shadow/hilight color levels. + * Fixed broken cheat support. + +1.80beta2 + * Pandora: updated documentation. + +1.80beta1 (2010-06-02) + + Added pandora port. + * Internal refactoring for 32x/SMS support. + * Move mapper database to external file. + + Added preliminary SMS emulation. + + Added emulation of 32x peripherals including VDP. More work is needed here. + + ARM: Added new SH2 recompiler for 32x. Some unification with SVP one. + - Disabled most of the above bacause I'm not yet happy with the results. + +1.56 (2009-09-19) + * Changed sync in Sega CD emulation again. Should fix games that + broke after changes in 1.51a. + * Fixed default keys rebinding when they shouldn't. + * Fixed sram being loaded from wrong game. + * Emu should no longer hang shortly after using fast-forward. + * Fixed save states sometimes no longer showing up in save state menu. + * ARM: some asm code refactoring for slight speed improvement. + +1.55 + + Added Wiz support. Now the same GP2X binary supports F100/F200 and Wiz. + * Changed shadow/hilight handling a bit, fixes some effects in Pirates! Gold. + * Complete input code rewrite. This fixes some limitations like not allowing + to control both players using single input device. It also allows to use + more devices (like keyboards) on Linux based devices. + * Options menu has been reordered, "restore defaults" option added. + +1.51b + * Fixed a crash when uncompressed savestate is loaded. + * Fixed an idle loop detection related hanging problem. + * PSP: fixed another palette related regression. + * UIQ3: updated frontend for the latest emu core. + +1.51a + * Fixed a sync problem between main and sub 68k. Should fix the hanging + problem for some games. + * ARM: fixed a crash when CD savestate is loaded just after loading ROM. + +1.51 + * Improved bin_to_cso_mp3 tool, it should no longer complain about + missing lame.exe even if it's in working dir. + * Fixed a regression from 1.50, which caused slowdowns in Final Fight. + * Fixed some regressions from 1.50 related to sprite limit and palette + handling (caused graphical glitches in some games). + + Added ABC turbo actions to key config. + * Some other minor adjustments. + +1.50 + + Added some basic support for Sega Pico, a MegaDrive-based toy. + + Added proper support for cue/bin images, including cdda playback. + .cue sheets with iso/cso/mp3/wav files listed in them are now + supported too (but 44kHz restriction still applies). + + Added bin_to_cso_mp3 tool, based on Exophase's bin_to_iso_ogg. + The tool can convert .cue/.bin Sega CD images to .cso/.mp3. + * Greatly improved Sega CD load times. + * Changed how scheduling between 68k and z80 is handled. Improves + performance for some games. Credits to Lordus for the idea. + * YM2612 state was not 100% saved, this should be better now. + * Improved renderer performance for shadow/hilight mode. + * Added a hack for YM2612 frequency overflow issue (bleep noises + in Shaq Fu, Spider-Man - The Animated Series (intro music), etc.) + Credits to Nemesis @ spritesmind forum. Works only if sound rate + is set to 44kHz. + + Implemented some sprite rendering improvements, as suggested by + Exophase. Games with lots of sprites now perform better. + + Added better idle loop detection, based on Lordus' idea again. + - "accurate timing" option removed, as disabling it no longer + improves performance. + - "accurate sprites" was removed too, the new sprite code can + properly handle sprite priorities in all cases. + * Timers adjusted again. + * Improved .smd detection code. + * ARM: fixed a bug in DrZ80 core, which could cause problems in + some rare cases. + * ARM: fixed a problem of occasional clicks on MP3 music start. + * Minor general optimizations and menu improvements. + * Fixed a bug in Sega CD savestate loader, where the game would + sometimes crash after load. + * Fixed a crash of games using eeprom (introduced in 1.40b). + * PSP: fixed suspend/resume (hopefully for real). + +1.40c + * Fixed a problem with sound in Marble Madness. + * GP2X: Fixed minor problem with key config. + +1.40b + * Fixed sprite masking code. Thanks to Lordus for explaining how it works. + + Added "disable sprite limit" option. + + PSP: added black level adjustment to display options. + * Changed reset to act as 'soft' reset. + + Added detection for Puggsy (it doesn't really have sram). + * Some small timing adjustments. + +1.40a + * GP2X: Fixed a binding problem with up and down keys. + * Default game config no longer overrides global user config. + +1.40 + + Added support for SVP (Sega Virtua Processor) to emulate Virtua Racing, + wrote ARM recompiler and some HLE code for VR. Credits to Exophase and + Rokas for various ideas. + * Changed config file format, files are now human-readable. Game specific + configs are now held in single file (but old game config files are still + read when new one is missing). + * Fixed a bug where some key combos didn't work as expected. + * Fixed a regression in renderer (ARM ports only, some graphic glitches in + rare cases). + * Adjusted fast renderer to work with more games, including VR. + * Fixed a problem where SegaCD RAM cart data was getting lost on reset. + * GP2X: Greatly reduced SegaCD FMV game slowdowns by disabling read-ahead + in the Linux kernel and C library (thanks to Rokas and Exophase for ideas + again). Be sure to keep "ReadAhead buffer" OFF to avoid slowdowns. + + PicoDrive now comes with a game config file for some games which need + special settings, so they should now work out-of-the-box. More games will + be added with later updates. + + GP2X: Files now can be deleted by pressing A+SELECT in the file browser. + +1.35b + * PSP: mp3 code should no longer fail on 1.5 firmware. + + PSP: added gamma adjustment option. + + Added .cso ISO format support. Useful for non-FMV games. + * It is now possile to force a region after the ROM is loaded. + * Fixed a sram bug in memhandlers (fixes Shining in the Darkness saves). + * PSP: fixed another bug in memhanlers, which crashed the emu for some games + (like NBA Jam and NHL 9x). + + PSP: added suspend/resume handling for Sega CD games. + + GP2X: added additional low volume levels for my late-night gaming sessions + (in stereo mode only). + + GP2X: added "fast forward" action in key config. Not recommended to use for + Sega CD, may case problems there. + * Some other small tweaks I forgot about. + +1.35a + * PSP: fixed a bug which prevented to load any ROMs after testing the BIOS. + * PSP: fixed incorrect CZ80 memory map setup, which caused Z80 crashes and + graphics corruption in EU Mega CD model1 BIOS menus. + + PSP: added additional "set to 4:3 scaled" display option for convenience. + + PSP: Added an option to disable frame limitter (works only with non-auto frameskip). + +1.35 + + PSP port added. Lots of new code for it. Integrated modified FAME/C, CZ80 cores. + + Some minor generic optimizations. + * Patched some code which was crashing under PSP, but was working in GP2X/Giz + (although it should have crashed there too). + * Readme updated. + +1.34 + + Gizmondo port added. + + Some new optimizations in memory handlers, and for shadow/hilight mode. + + Added some hacks to make more games work without enabling "accurate timing". + * Adjusted timing for "accurate timing" mode and added preliminary VDP FIFO + emulation. Fixes Double Dragon 2, tearing in Chaos Engine and some other games. + * Fixed a few games not having sound at startup. + * Updated serial EEPROM code to support more games. Thanks to EkeEke for + providing info about additional EEPROM types and game mappers. + * The above change fixed hang of NBA Jam. + * Minor adjustments to control configurator. + +1.33 + * Updated Cyclone core to 0.0088. + + Added A r k's usbjoy fix. + + Added "perfect vsync" option, which adjusts GP2X LCD refresh rate and syncs + emulation to it to eliminate tearing and ensure smoothest scrolling possible. + + Added an option to use A_SN's gamma curve for gamma correction (improves dark + and bright color display for mk2s). + * Sometimes stray sounds were played after loading a savestate. Fixed. + * Fixed a problem where >6MB mp3s were corrupted in memory (sound glitches in + Snatcher). + * PD no longer overwrites video player code in memory, video player now can be + used after exiting PicoDrive. + * Fixed a bug which was causing Sonic 3 code to deadlock in some rare conditions + if "accurate timing" was not enabled. + * Fixed support for large hacked ROMs like "Ultimate Mortal Kombat Trilogy". + Upto 10MB hacked ROMs are supported now. + + Config profiles added (press left/right when saving config). + * Changed key configuration behavior to the one from gpfce (should be more + intuitive). + + Added some skinning capabilities to the menu system with default skin by + ketchupgun. Delete skin directory if you want old behaviour. + * Some other little tweaks I forgot about. + +1.32 + + Added some new scaling options. + + Added ability to reload CD images while game is running (needed for games + with multiple CDs, like Night Trap). + + Added RAM cart emulation. + * Fixed DMA timing emulation (caused lock-ups for some genesis games). + * Idle loop detection was picking up wrong code and causing glitches, fixed. + * The ym2612 code on 940 now can handle multiple updates per frame + (fixes Thunger Force III "seiren" level drums for example). + * Memory handlers were ignoring some writes to PSG chip, fixed (missing sounds in + Popful Mail, Silpheed). + * Improved z80 timing, should fix some sound problems. + * Fixed a bug with sram register (fixes Phantasy Star 4). + * ROM loader was incorrectly identifying some ROMs as invalid. Fixed. + * Added code for PRG ram write protection register (Dungeon Explorer). + * The memory mode register change in 1.31 was unsafe and caused some glitches in + AH-3 Thunderstrike. Fixed. + * Fixed a file descriptor leak. + * Updated documentation, added Gmenu2x manual. + +1.31 + * Changed the way memory mode register is read (fixes Lunar 2, broken in 1.30). + * Fixed TAS opcode on sub-68k side (fixes Batman games). + * File browser now filters out mp3s, saves and some other files, which are not ROMS. + +1.30 + + ISO files now can be zipped. Note that this causes VERY long loading times. + + Added data pre-buffering support, this allows to reduce frequency of short pauses + in FMV games (caused by SD access), but makes those pauses longer. + * Fixed PCM DMA transfers (intro FMV in Popful Mail). + + Properly implemented "decode" data transformation (Jaguar XJ220). + * Integrated "better sync" code into cyclone code, what made this mode much faster. + * Fixed a bug related to game specific config saving. + * Frameskipper was skipping sound processing, what caused some audio desyncs. Fixed. + * Fixed reset not working for some games. + + New assembly optimized memory handlers for CD (gives at least a few fps). + Also re-enabled all optimizations from 0.964 release. + + New idle-loop detection code for sub-68k. Speeds up at least a few games. + +1.201 + + Added basic cheat support (GameGenie and Genecyst patches). + +1.20 + * Fixed a long-standing problem in audio mixing code which caused slight distortions + at lower sample rates. + * Changed the way 920 and 940 communicates (again), should be more reliable and give + slight performance increase. + * Some optimizations in audio mixing code. + * Some menu changes (background added, smaller font in ROM browser, savestate loader + now can select slots). + + 1M mode DMA transfers implemented (used by FMV games like Night Trap and Sewer Shark). + + Games now can run code from WORD RAM in 1M mode (fixes Adventures of Willy Beamish). + + "Cell arrange" address mapping is now emulated (Heart of the alien). + + "Color numeric operation" is now emulated (text in Lunar 2, Silpheed intro graphics). + + "Better sync" option added (prevents some games from hanging). + +1.14 + + Region autodetection now can be customized. + * When CDDA music tracks changed, old buffer contents were incorrectly played. Fixed. + * BRAM is now automatically formatted (no need to enter BIOS menu and format any more). + * Games now can be reset, CDDA music no longer breaks after loading another ISO. + * Fixed a race condition between 920 and 940 which sometimes caused CDDA music not to play. + + Savestates implemented for Sega/Mega CD. + + PCM sound added. + * Some mixer code rewritten in asm. 22kHz and 11kHz sound rates are now supported in + Mega CD mode (but mp3s must still be 44kHz stereo). + + Timer emulation added. + * CDC DMA tansfers fixed. Snatcher and probably some more games now boot. + * 2M word RAM -> VDP transfers fixed, no more corruption in Ecco and some other games. + +1.10 + + GP2X: Added experimental Sega CD support. + + GP2X: Added partial gmv movie playback support. + +0.964 (2006-12-03) + * GP2X: Fixed a sound buffer underflow issue on lower sample rate modes, which was + happening for NTSC games and causing sound clicks. + * GP2X: Redone key config to better support USB joysticks (now multiple joysticks + should be useable and configurable). + + GP2X: Added save confirmation option. + + GP2X: Added 940 CPU crash detection. + + ALL: UIQ3 port added. + +0.963 + * GP2X: Gamma-reset-on-entering-menu bug fixed. + * GP2X: Recompiled PicoDrive with gcc profiling option set as described here: + http://www.gp32x.com/board/index.php?showtopic=28490 + +0.962 + * GP2X: Fixed an issue with incorrect sounds in some games when dualcore operation + was enabled (for example punch sound in SOR). + * GP2X: Limited max volume to 90, because higher values often cause distortions. + * GP2X: Fixed a bug with lower res scaling. + * GP2X: Gamma is now reset on exit. + +0.96 + * ALL: Severely optimized MAME's YM2612 core, part of it is now rewritten in asm. + + GP2X: The YM2612's code now can be run in GP2X's ARM940T CPU, what causes large + performance increase. + * ALL: Accurate renderers are slightly faster now. + + GP2X: Using quadruple buffering instead of doublebuffer now, also updated + framelimitter, this should eliminate some scrolling and tearing problems. + * GP2X: Fixed some flickering issues of 8bit accurate renderer. + + GP2X: craigix's RAM timings now can be enabled in the menu (see advanced options). + + GP2X: Added ability to save config for specific games only. + + GP2X: Gamma control added (using GP2X's hardware capabilities for this). + * GP2X: Volume keys are now configurable. + + GP2X: GnoStiC added USB joystick support, I made it possible to use it for + player 2 control (currently untested). + * GP2X: squidgehack is now applied through kernel module (cleaner way). + +0.95 + * ALL: Fixed a bug in sprite renderer which was causing slowdowns for some games. + + GP2X: Added command line support + + GP2X: Added optional hardware scaling for lower-res games like Shining Force. + * ALL: Sound chips are now sampled 2 times per frame. This fixed some games which + had missing sounds (Vectorman 2 1st level, Thunder Force 3 water level, + etc.). + + ALL: Added another accurate 8-bit renderer which is slightly faster and made it + default. + +0.945 + + GP2X: Added frame limiter for frameskipped modes. + * GP2X: Increased brightness a bit (unused pixel bits now also contain data). + * GP2X: Suidgehack was not applied correctly (was applied before allocating some + high memory and had no effect). + +0.94 + + Added GP2X port. + * Improved interrupt timing, Mazin Saga and Burning Force now works. + * Rewritten renderer code to better suit GP2X, should be faster on other + ports too. + + Added support for banking used by 12-in-1 and 4-in-1 ROMs (thanks Haze). + + Added some protection device faking, used by some unlicensed games like + Super Bubble Bobble, King of Fighters, Elf Wor, ... (thanks to Haze again) + + Added primitive Virtua Racing SVP faking, so menus can be seen now. + +0.93 + * Fixed a problem with P900/P910 key configuration in FC mode. + * Improved shadow/hilight mode emulation. Still not perfect, but should be + enough for most games. + + Save state slots added. + + Region selector added. + +0.92 + VDP changes: + * VDP emulation is now more accurate (fixes flickering in Chase HQ II, + Super Hang-On and some other problems in other games). + * HV counter emulation is now much more accurate. Fixes the Asterix games, + line in Road Rash 3, etc. + * Minor sprite and layer scroll masking bugs fixed. + + Added partial interlace mode renderer (Sonic 2 vs mode) + * Fixed a crash in both renderers when certain size window layers were used. + + Added emulation of shadow/hilight operator sprites. Other shadow/hilight + effects are still unemulated. + + Sprite emulation is more accurate, sprite limit is emulated. + + Added "accurate sprites" option, which always draws sprites in correct + order and emulates sprite collision bit, but is significantly slower. + + Emulation changes: + * Improved interrupt handling, added deferred interrupt emulation + (Lemmings, etc). + + Added serial EEPROM SRAM support (Wonder Boy in Monster World, + Megaman - The Wily Wars and many EA sports games like NBA Jam). + + Implemented ROM banking for Super Street Fighter II - The New Challengers + * Updated to the latest version of DrZ80 core, integrated memory handlers + in it for better performance. A noticeable performance increase, but save + states may not work from the previous version (you can only use them with + sound disabled in that case). + + SRAM word read handler was using incorrect byte order, fixed. + + Changes in Cyclone 0.0086: + + Added missing CHK opcode handler (used by SeaQuest DSV). + + Added missing TAS opcode handler (Gargoyles,Bubba N Stix,...). As in real genesis, + memory write-back phase is ignored (but can be enabled in config.h if needed). + + Added missing NBCD and TRAPV opcode handlers. + + Added missing addressing mode for CMP/EOR. + + Added some minor optimizations. + - Removed 216 handlers for 2927 opcodes which were generated for invalid addressing modes. + + Fixed flags for ASL, NEG, NEGX, DIVU, ADDX, SUBX, ROXR. + + Bugs fixed in MOVEP, LINK, ADDQ, DIVS handlers. + * Undocumented flags for CHK, ABCD, SBCD and NBCD are now emulated the same way as in Musashi. + + Added Uninitialized Interrupt emulation. + + Altered timing for about half of opcodes to match Musashi's. + +0.80 + * Nearly all VDP code was rewritten in ARM asm. Gives ~10-25% performance + increase (depends on game). + * Optimized 32-column renderer not to render tiles offscreen, games which + use 32-column display (like Shining Force) run ~50% faster. + + Added new "Alternative renderer", which gives another ~30-45% performance + increase (in addition to mentioned above), but works only with some games, + because it is missing some features (it uses tile-based rendering + instead of default line-based and disables H-ints). + + Added "fit2" display mode for all FC gamers. It always uses 208x146 for + P800 and 208x208 for all other phones. + + Added volume control for Motorolas (experimental). + + VDP changes: + + Added support for vertical window (used by Vapor Trail, Mercs, GRIND + Stormer and others). + + Added sprite masking (hiding), adds some speed. + + Added preliminary H counter emulation. Comix Zone and Sonic 3D Blast + special stage are now playable. + + Added column based vertical scrolling (Gunstar Heroes battleship level, + Sonic and Knuckles lava boss, etc). + + Emulation changes: + + Re-added and improved Z80 faking when Z80 is disabled. Many games now can + be played without enabling Z80 (Lost Vikings, Syndicate, etc), but some + still need it (International Superstar Soccer Deluxe). + * Improved ym2612 timers, Outrun music plays at correct speed, voices in + Earthworm Jim play better, more games play sound. + * I/O registers now remember their values (needed for Pirates! Gold) + + Added support for 6 button pad. + + Changes in Cyclone 0.0083wip: + + Added missing CHK opcode (used by SeaQuest DSV). + + Added missing TAS opcode (Gargoyles). As in real genesis, write-back phase + is ignored (but is enabled for other systems). + + Backported stuff from Snes9x: + * Fixed Pxxx jog up/down which were not working in game. + + Added an option to gzip save states to save space. + + The emulator now pauses whenever it is loosing focus, so it will now pause + when alarm/ponecall/battery low/... windows come up. + - Removed 'pause on phonecall' feature, as it is no longer needed. + + Video fix for asian A1000s. + +0.70 + * Started using tools from "Symbian GCC Improvement Project", which give + considerable speed increase (~4fps in "center 90" mode). + * Rewrote some drawing routines in ARM assembly (gives ~6 more fps in + "center 90" mode). + * Minor improvement to 0 and 180 "fit" modes. Now they look slightly better + and are faster. + * Minor stability improvements (emulator is less likely to crash). + + Added some background for OSD text for better readability. + + Added Pal/NTSC detection. This is needed for proper sound speed. + + Implemented Reesy's DrZ80 Z80 emu. Made some changes to it with hope to make + it faster. + + Implemented ym2612 emu from the MAME project. Runs well but sometimes sounds + a bit weird. Could be a little faster, so made some changes too. + + Implemented SN76489 emu from the MAME project. + + Added two separate sound output methods (mediaserver and cmaudiofb) with + autodetection (needs testing). + * Fixed VDP DMA fill emulation (as described in Charles MacDonald's docs), + fixes Contra and some other games. + +0.301 + Launcher: + * Launcher now starts emulation process from current directory, + not from hardcoded paths. + * Improved 'pause on call' feature, should hopefully work with Motorola phones. + +0.30 (2006-01-07) + Initial release based on fDave's code. diff --git a/Makefile b/Makefile index cef431cc..4381b5a7 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,10 @@ -TARGET ?= PicoDrive -CFLAGS += -Wall -ggdb -falign-functions=2 -CFLAGS += -I. -ifndef DEBUG -CFLAGS += -O2 -DNDEBUG -endif -#CFLAGS += -DEVT_LOG -#CFLAGS += -DDRC_CMP -#cpu_cmp = 1 -#drc_debug = 7 -#profile = 1 - +$(LD) ?= $(CC) +TARGET ?= picodrive +ASAN ?= 0 +DEBUG ?= 0 +CFLAGS += -I$(PWD) +CYCLONE_CC ?= gcc +CYCLONE_CXX ?= g++ all: config.mak target_ @@ -28,7 +23,70 @@ else # NO_CONFIG_MAK config.mak: endif +# This is actually needed, believe me - one bit is used as a flag in some tables +# If you really have to disable this, set NO_ALIGN_FUNCTIONS elsewhere. +ifndef NO_ALIGN_FUNCTIONS +CFLAGS += -falign-functions=2 +endif + +# profiling +pprof ?= 0 +gperf ?= 0 + +ifneq ("$(PLATFORM)", "libretro") + CFLAGS += -Wall -g +ifneq ("$(PLATFORM)", "psp") +ifneq ($(findstring gcc,$(shell $(CC) -v 2>&1)),) + CFLAGS += -ffunction-sections -fdata-sections + LDFLAGS += -Wl,--gc-sections +endif +endif + +ifeq "$(ASAN)" "1" + CFLAGS += -fsanitize=address -fsanitize=leak -fsanitize=bounds -fno-omit-frame-pointer -fno-common -O1 -g + LDLIBS += -fsanitize=address -fsanitize=leak -fsanitize=bounds -static-libasan +else +ifeq "$(DEBUG)" "0" + CFLAGS += -O3 -DNDEBUG +else + CFLAGS += -O1 +endif +endif + LD = $(CC) + OBJOUT ?= -o + LINKOUT ?= -o +endif + + +chkCCflag = $(shell n=/dev/null; echo $(1) | tr " " "\n" | while read f; do \ + $(CC) $$f -x c -c $$n -o $$n 2>$$n && echo "_$$f" | tr -d _; done) + +ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","gp2x" "opendingux" "rpi1")) +# very small caches, avoid optimization options making the binary much bigger +CFLAGS += -fno-common -finline-limit=42 -fno-unroll-loops -ffast-math +CFLAGS += $(call chkCCflag, -fno-stack-protector) +ifneq ($(call chkCCflag, -fipa-ra),) # gcc >= 5 +CFLAGS += $(call chkCCflag, -flto -fipa-pta -fipa-ra) +else +# these improve execution speed on 32bit arm/mips with gcc pre-5 toolchains +CFLAGS += $(call chkCCflag, -fno-caller-saves -fno-guess-branch-probability -fno-regmove) +# very old gcc toolchains may not have these options +CFLAGS += $(call chkCCflag, -fno-tree-loop-if-convert -fipa-pta -fno-ipa-cp) +endif +else +ifneq ($(STATIC_LINKING), 1) +CFLAGS += $(call chkCCflag, -flto) +endif +endif + +# revision info from repository if this not a tagged release +ifeq "$(shell git describe --tags --exact-match HEAD 2>/dev/null)" "" +GIT_REVISION ?= -$(shell git rev-parse --short HEAD || echo unknown) +endif +CFLAGS += -DREVISION=\"$(GIT_REVISION)\" + # default settings +use_libchdr ?= 1 ifeq "$(ARCH)" "arm" use_cyclone ?= 1 use_drz80 ?= 1 @@ -39,42 +97,120 @@ asm_memory ?= 1 asm_render ?= 1 asm_ym2612 ?= 1 asm_misc ?= 1 -asm_cdpico ?= 1 asm_cdmemory ?= 1 asm_mix ?= 1 -else # if not arm +asm_32xdraw ?= 1 +asm_32xmemory ?= 1 +else use_fame ?= 1 use_cz80 ?= 1 +ifneq (,$(filter x86% i386% i686% mips% aarch% riscv% powerpc% ppc%, $(ARCH))) +use_sh2drc ?= 1 +endif endif -include Makefile.local -ifneq "$(use_cyclone)" "1" -# due to CPU stop flag access -asm_cdpico = 0 -asm_cdmemory = 0 +# TODO this should somehow go to the platform directory? +ifeq "$(PLATFORM)" "generic" +PicoDrive.zip: $(TARGET) + $(RM) -rf .od_data + mkdir .od_data + cp -r platform/linux/skin .od_data + cp platform/game_def.cfg .od_data + $(STRIP) $< -o .od_data/picodrive + cd .od_data && zip -9 -r ../$@ * +all: PicoDrive.zip endif -# frontend -ifeq "$(PLATFORM)" "generic" +ifeq "$(PLATFORM)" "opendingux" +.od_data: $(TARGET) + $(RM) -rf .od_data + mkdir .od_data + cp -r platform/opendingux/data/. .od_data + cp platform/game_def.cfg .od_data + $(STRIP) $< -o .od_data/picodrive +.PHONY: .od_data + +ifneq (,$(filter %__DINGUX__, $(CFLAGS))) +# "legacy" dingux without opk support +PicoDrive-dge.zip: .od_data + rm -f .od_data/default.*.desktop + cd .od_data && zip -9 -r ../$@ * +all: PicoDrive-dge.zip +CFLAGS += -DSDL_SURFACE_SW # some legacy dinguces had bugs in HWSURFACE +else +ifneq (,$(filter %__MIYOO__, $(CFLAGS))) +PicoDrive-miyoo.ipk: .od_data + rm -f .od_data/default.*.desktop .od_data/PicoDrive.dge + gm2xpkg -i platform/miyoo/pkg.cfg + mv picodrive.ipk $@ + @gm2xpkg -c platform/miyoo/pkg.cfg >/dev/null 2>&1 +all: PicoDrive-miyoo.ipk +else +PicoDrive.opk: .od_data + rm -f .od_data/PicoDrive.dge + mksquashfs .od_data $@ -all-root -noappend -no-exports -no-xattrs +all: PicoDrive.opk +endif +endif + +OBJS += platform/opendingux/inputmap.o +use_inputmap ?= 1 + +# OpenDingux is a generic platform, really. +PLATFORM := generic +endif +ifeq ("$(PLATFORM)",$(filter "$(PLATFORM)","rpi1" "rpi2")) +CFLAGS += -DHAVE_GLES -DRASPBERRY +CFLAGS += -I/opt/vc/include/ -I/opt/vc/include/interface/vcos/pthreads/ -I/opt/vc/include/interface/vmcs_host/linux/ +LDFLAGS += -ldl -lbcm_host -L/opt/vc/lib +# Stupid renaming occured in latest raspbian... +ifneq (,$(wildcard /opt/vc/lib/libbrcmGLESv2.so)) +LDFLAGS += -lbrcmEGL -lbrcmGLESv2 +else +LDFLAGS += -lEGL -lGLESv2 # on raspi GLESv1_CM is included in GLESv2 +endif OBJS += platform/linux/emu.o platform/linux/blit.o # FIXME -OBJS += platform/common/plat_sdl.o +OBJS += platform/common/plat_sdl.o platform/common/input_sdlkbd.o OBJS += platform/libpicofe/plat_sdl.o platform/libpicofe/in_sdl.o -OBJS += platform/libpicofe/plat_dummy.o +OBJS += platform/libpicofe/linux/plat.o +USE_FRONTEND = 1 +endif +ifeq "$(PLATFORM)" "generic" +#ifeq (y,$(shell echo "\#include " | $(CC) -E -xc - >/dev/null 2>&1 && echo y)) +ifeq "$(HAVE_GLES)" "1" +CFLAGS += -DHAVE_GLES +LDFLAGS += -lEGL -lGLESv1_CM +endif +CFLAGS += -DSDL_OVERLAY_2X -DSDL_BUFFER_3X -DSDL_REDRAW_EVT +OBJS += platform/linux/emu.o platform/linux/blit.o # FIXME +ifeq "$(use_inputmap)" "1" +OBJS += platform/common/plat_sdl.o platform/opendingux/inputmap.o +else +OBJS += platform/common/plat_sdl.o platform/common/inputmap_kbd.o +endif +OBJS += platform/libpicofe/plat_sdl.o platform/libpicofe/in_sdl.o +OBJS += platform/libpicofe/linux/plat.o USE_FRONTEND = 1 endif ifeq "$(PLATFORM)" "pandora" platform/common/menu_pico.o: CFLAGS += -DPANDORA +platform/libpicofe/linux/plat.o: CFLAGS += -DPANDORA OBJS += platform/pandora/plat.o OBJS += platform/pandora/asm_utils.o OBJS += platform/common/arm_utils.o +OBJS += platform/libpicofe/linux/in_evdev.o OBJS += platform/libpicofe/linux/fbdev.o OBJS += platform/libpicofe/linux/xenv.o +OBJS += platform/libpicofe/linux/plat.o OBJS += platform/libpicofe/pandora/plat.o USE_FRONTEND = 1 endif ifeq "$(PLATFORM)" "gp2x" OBJS += platform/common/arm_utils.o +OBJS += platform/libpicofe/linux/in_evdev.o +OBJS += platform/libpicofe/linux/plat.o OBJS += platform/libpicofe/gp2x/in_gp2x.o OBJS += platform/libpicofe/gp2x/soc.o OBJS += platform/libpicofe/gp2x/soc_mmsp2.o @@ -88,22 +224,69 @@ OBJS += platform/gp2x/vid_mmsp2.o OBJS += platform/gp2x/vid_pollux.o OBJS += platform/gp2x/warm.o USE_FRONTEND = 1 -PLATFORM_MP3 = 1 +PLATFORM_MP3 ?= 1 +endif +ifeq "$(PLATFORM)" "psp" +CFLAGS += -DUSE_BGR565 -G8 # -DLPRINTF_STDIO -DFW15 +LDLIBS += -lpspgu -lpspge -lpsppower -lpspaudio -lpspdisplay -lpspaudiocodec +LDLIBS += -lpspctrl +platform/common/main.o: CFLAGS += -Dmain=pico_main +OBJS += platform/psp/plat.o +OBJS += platform/psp/emu.o +OBJS += platform/psp/in_psp.o +OBJS += platform/psp/psp.o +OBJS += platform/psp/asm_utils.o +OBJS += platform/psp/mp3.o +USE_FRONTEND = 1 +endif +ifeq "$(PLATFORM)" "ps2" +CFLAGS += -DUSE_BGR555 # -DLOG_TO_FILE +LDLIBS += -lpatches -lgskit -ldmakit -lps2_drivers +OBJS += platform/ps2/plat.o +OBJS += platform/ps2/emu.o +OBJS += platform/ps2/in_ps2.o +USE_FRONTEND = 1 endif ifeq "$(PLATFORM)" "libretro" -OBJS += platform/libretro.o +OBJS += platform/libretro/libretro.o +ifneq ($(STATIC_LINKING), 1) +CFLAGS += -DHAVE_ZLIB +OBJS += platform/libretro/libretro-common/formats/png/rpng.o +OBJS += platform/libretro/libretro-common/streams/trans_stream.o +OBJS += platform/libretro/libretro-common/streams/trans_stream_pipe.o +OBJS += platform/libretro/libretro-common/streams/trans_stream_zlib.o +OBJS += platform/libretro/libretro-common/file/file_path_io.o +OBJS += platform/libretro/libretro-common/file/file_path.o +OBJS += platform/libretro/libretro-common/vfs/vfs_implementation.o +OBJS += platform/libretro/libretro-common/time/rtime.o +OBJS += platform/libretro/libretro-common/string/stdstring.o +OBJS += platform/libretro/libretro-common/compat/compat_strcasestr.o +OBJS += platform/libretro/libretro-common/encodings/encoding_utf.o +OBJS += platform/libretro/libretro-common/compat/compat_strl.o +ifeq "$(USE_LIBRETRO_VFS)" "1" +OBJS += platform/libretro/libretro-common/compat/compat_posix_string.o +OBJS += platform/libretro/libretro-common/compat/fopen_utf8.o +OBJS += platform/libretro/libretro-common/streams/file_stream.o +OBJS += platform/libretro/libretro-common/streams/file_stream_transforms.o +endif +endif +ifeq "$(USE_LIBRETRO_VFS)" "1" +OBJS += platform/libretro/libretro-common/memmap/memmap.o +endif endif ifeq "$(USE_FRONTEND)" "1" # common -OBJS += platform/common/main.o platform/common/emu.o \ +OBJS += platform/common/main.o platform/common/emu.o platform/common/upscale.o \ platform/common/menu_pico.o platform/common/config_file.o # libpicofe OBJS += platform/libpicofe/input.o platform/libpicofe/readpng.o \ - platform/libpicofe/fonts.o platform/libpicofe/linux/in_evdev.o \ - platform/libpicofe/linux/plat.o + platform/libpicofe/fonts.o +ifneq (,$(filter %HAVE_GLES, $(CFLAGS))) +OBJS += platform/libpicofe/gl.o platform/libpicofe/gl_platform.o +endif # libpicofe - sound OBJS += platform/libpicofe/sndout.o @@ -126,19 +309,62 @@ endif endif # USE_FRONTEND -OBJS += platform/common/mp3.o +ifneq "$(PLATFORM)" "psp" +OBJS += platform/common/mp3.o platform/common/mp3_sync.o ifeq "$(PLATFORM_MP3)" "1" +OBJS += platform/common/mp3_helix.o else ifeq "$(HAVE_LIBAVCODEC)" "1" OBJS += platform/common/mp3_libavcodec.o else -OBJS += platform/common/mp3_dummy.o +#OBJS += platform/common/mp3_minimp3.o +OBJS += platform/common/mp3_drmp3.o +endif endif +ifeq (1,$(use_libchdr)) +CFLAGS += -DUSE_LIBCHDR + +# chdr +CHDR = pico/cd/libchdr +CHDR_OBJS += $(CHDR)/src/libchdr_chd.o $(CHDR)/src/libchdr_cdrom.o +CHDR_OBJS += $(CHDR)/src/libchdr_flac.o +CHDR_OBJS += $(CHDR)/src/libchdr_bitstream.o $(CHDR)/src/libchdr_huffman.o + +LZMA = $(CHDR)/deps/lzma-24.05 +LZMA_OBJS += $(LZMA)/src/CpuArch.o $(LZMA)/src/Alloc.o $(LZMA)/src/LzmaEnc.o +LZMA_OBJS += $(LZMA)/src/Sort.o $(LZMA)/src/LzmaDec.o $(LZMA)/src/LzFind.o +LZMA_OBJS += $(LZMA)/src/Delta.o +$(LZMA_OBJS): CFLAGS += -DZ7_ST -Wno-unused + +ZSTD = $(CHDR)/deps/zstd-1.5.6/lib +ZSTD_OBJS += $(ZSTD)/common/entropy_common.o $(ZSTD)/common/error_private.o +ZSTD_OBJS += $(ZSTD)/common/fse_decompress.o $(ZSTD)/common/xxhash.o +ZSTD_OBJS += $(ZSTD)/common/zstd_common.o +ZSTD_OBJS += $(ZSTD)/decompress/huf_decompress.o +ifneq (,$(filter x86%, $(ARCH))) +ZSTD_OBJS += $(ZSTD)/decompress/huf_decompress_amd64.o +endif +ZSTD_OBJS += $(ZSTD)/decompress/zstd_ddict.o +ZSTD_OBJS += $(ZSTD)/decompress/zstd_decompress_block.o +ZSTD_OBJS += $(ZSTD)/decompress/zstd_decompress.o +$(ZSTD_OBJS) $(CHDR_OBJS): CFLAGS += -I$(ZSTD) -Wno-unused + +OBJS += $(CHDR_OBJS) $(ZSTD_OBJS) +ifneq ($(STATIC_LINKING), 1) +OBJS += $(LZMA_OBJS) +endif +# ouf... prepend includes to overload headers available in the toolchain +CFLAGS := -I$(LZMA)/include -I$(CHDR)/include $(CFLAGS) +endif + +ifeq "$(PLATFORM_ZLIB)" "1" # zlib OBJS += zlib/gzio.o zlib/inffast.o zlib/inflate.o zlib/inftrees.o zlib/trees.o \ zlib/deflate.o zlib/crc32.o zlib/adler32.o zlib/zutil.o zlib/compress.o zlib/uncompr.o +CFLAGS += -Izlib +endif # unzip -OBJS += unzip/unzip.o unzip/unzip_stream.o +OBJS += unzip/unzip.o include platform/common/common.mak @@ -146,28 +372,66 @@ include platform/common/common.mak OBJS += $(OBJS_COMMON) CFLAGS += $(addprefix -D,$(DEFINES)) -ifneq ($(findstring gcc,$(CC)),) -LDFLAGS += -Wl,-Map=$(TARGET).map +ifneq (,$(findstring sdl,$(OBJS))) +CFLAGS += -DUSE_SDL endif +ifneq ($(findstring gcc,$(CC)),) +ifneq ($(findstring SunOS,$(shell uname -a)),SunOS) +ifneq ($(findstring clang,$(shell $(CC) -v 2>&1)),) +LDFLAGS += -Wl,-map,$(TARGET).map +else +LDFLAGS += -Wl,-Map=$(TARGET).map +endif +endif +endif target_: $(TARGET) clean: - $(RM) $(TARGET) $(OBJS) + $(RM) $(TARGET) $(OBJS) pico/pico_int_offs.h + $(MAKE) -C cpu/cyclone clean + $(MAKE) -C cpu/musashi clean + $(MAKE) -C tools clean + $(RM) -r .od_data $(TARGET): $(OBJS) - $(CC) -o $@ $(CFLAGS) $^ $(LDFLAGS) $(LDLIBS) + +ifeq ($(STATIC_LINKING_LINK), 1) + $(AR) rcs $@ $^ +else + $(LD) $(LINKOUT)$@ $^ $(CFLAGS) $(LDFLAGS) $(LDLIBS) +endif + +ifeq "$(PLATFORM)" "psp" +PSPSDK ?= $(shell psp-config --pspsdk-path) +TARGET = PicoDrive +PSP_EBOOT_TITLE = Sonic UGC +PSP_EBOOT_ICON = platform/psp/data/icon.png +PSP_EBOOT_PIC1 = platform/psp/skin/background_selector.png +PSP_EBOOT_SND0 = platform/psp/data/SND0.AT3 + +LIBS += -lpng -lm -lz -lpspgu -lpsppower -lpspaudio -lpsprtc -lpspaudiocodec +EXTRA_TARGETS = EBOOT.PBP +include $(PSPSDK)/lib/build.mak +# TODO image generation +endif pprof: platform/linux/pprof.c - $(CC) -O2 -ggdb -DPPROF -DPPROF_TOOL -I../../ -I. $^ -o $@ + $(CC) $(CFLAGS) -O2 -ggdb -DPPROF -DPPROF_TOOL -I../../ -I. $^ -o $@ $(LDFLAGS) $(LDLIBS) -tools/textfilter: tools/textfilter.c - make -C tools/ textfilter +pico/pico_int_offs.h: tools/mkoffsets.sh + make -C tools/ XCC="$(CC)" XCFLAGS="$(CFLAGS) -UUSE_LIBRETRO_VFS" XPLATFORM="$(platform)" + +%.o: %.c + $(CC) -c $(OBJOUT)$@ $< $(CFLAGS) .s.o: $(CC) $(CFLAGS) -c $< -o $@ +.S.o: + $(CC) $(CFLAGS) -c $< -o $@ + # special flags - perhaps fix this someday instead? pico/draw.o: CFLAGS += -fno-strict-aliasing pico/draw2.o: CFLAGS += -fno-strict-aliasing @@ -177,11 +441,47 @@ pico/cd/cd_file.o: CFLAGS += -fno-strict-aliasing pico/cd/pcm.o: CFLAGS += -fno-strict-aliasing pico/cd/LC89510.o: CFLAGS += -fno-strict-aliasing pico/cd/gfx_cd.o: CFLAGS += -fno-strict-aliasing +ifeq (1,$(use_sh2drc)) +ifneq (,$(findstring -flto,$(CFLAGS))) +# if using the DRC, memory and sh2soc directly use the DRC register for SH2 SR +# to avoid saving and reloading it. However, this collides with the use of LTO. +pico/32x/memory.o: CFLAGS += -fno-lto +pico/32x/sh2soc.o: CFLAGS += -fno-lto +cpu/sh2/compiler.o: CFLAGS += -fno-lto +endif +ifneq (,$(filter mips64%, $(ARCH))$(filter %mips32r2, $(CFLAGS))) +CFLAGS += -DMIPS_USE_SYNCI # mips32r2 clear_cache uses SYNCI instead of a syscall +endif +endif -# random deps -pico/carthw/svp/compiler.o : cpu/drc/emit_$(ARCH).c -cpu/sh2/compiler.o : cpu/drc/emit_$(ARCH).c +# fame needs ~2GB of RAM to compile on gcc 4.8 +# on x86, this is reduced by ~300MB when debug info is off (but not on ARM) +# not using O3 and -fno-expensive-optimizations seems to also help, but you may +# want to remove this stuff for better performance if your compiler can handle it +ifeq "$(DEBUG)" "0" +ifeq (,$(findstring msvc,$(platform))) +cpu/fame/famec.o: CFLAGS += -g0 -O2 -fno-expensive-optimizations +else +cpu/fame/famec.o: CFLAGS += -Od +endif +endif + +tools/make_carthw_c: + make -C tools make_carthw_c +pico/carthw_cfg.c: pico/carthw.cfg tools/make_carthw_c + tools/make_carthw_c $< $@ + +# preprocessed asm files most probably include the offsets file +$(filter %.S,$(SRCS_COMMON)): pico/pico_int_offs.h + +# random deps - TODO remove this and compute dependcies automatically +pico/carthw/svp/compiler.o : cpu/drc/emit_arm.c +cpu/sh2/compiler.o : cpu/drc/emit_arm.c cpu/drc/emit_arm64.c cpu/drc/emit_ppc.c +cpu/sh2/compiler.o : cpu/drc/emit_x86.c cpu/drc/emit_mips.c cpu/drc/emit_riscv.c cpu/sh2/mame/sh2pico.o : cpu/sh2/mame/sh2.c -pico/pico.o pico/cd/mcd.o pico/32x/32x.o : pico/pico_cmn.c pico/pico_int.h -pico/memory.o pico/cd/memory.o pico/32x/memory.o : pico/pico_int.h pico/memory.h +pico/pico.o pico/cd/mcd.o pico/32x/32x.o : pico/pico_cmn.c +pico/memory.o pico/cd/memory.o pico/32x/memory.o : pico/memory.h +$(shell grep -rl pico_int.h pico) : pico/pico_int.h +# pico/cart.o : pico/carthw_cfg.c cpu/fame/famec.o: cpu/fame/famec.c cpu/fame/famec_opcodes.h +platform/common/menu_pico.o: platform/libpicofe/menu.c diff --git a/Makefile.libretro b/Makefile.libretro index 87360482..578b10e8 100644 --- a/Makefile.libretro +++ b/Makefile.libretro @@ -1,16 +1,23 @@ # Makefile for PicoDrive (libretro) +SPACE := +SPACE := $(SPACE) $(SPACE) +BACKSLASH := +BACKSLASH := \$(BACKSLASH) +filter_out1 = $(filter-out $(firstword $1),$1) +filter_out2 = $(call filter_out1,$(call filter_out1,$1)) + ifeq ($(platform),) -platform = unix -ifeq ($(shell uname -a),) - platform = win -else ifneq ($(findstring MINGW,$(shell uname -a)),) - platform = win -else ifneq ($(findstring Darwin,$(shell uname -a)),) - platform = osx -else ifneq ($(findstring win,$(shell uname -a)),) - platform = win -endif + platform = unix + ifeq ($(shell uname -a),) + platform = win + else ifneq ($(findstring MINGW,$(shell uname -a)),) + platform = win + else ifneq ($(findstring Darwin,$(shell uname -a)),) + platform = osx + else ifneq ($(findstring win,$(shell uname -a)),) + platform = win + endif endif CC ?= gcc @@ -19,136 +26,687 @@ AS ?= as CC_AS ?= $(CC) CFLAGS ?= +#libretro includes +CFLAGS += -I platform/libretro/libretro-common/include +CFLAGS += -I platform/libretro/libretro-common/include/compat +CFLAGS += -I platform/libretro/libretro-common/include/encodings +CFLAGS += -I platform/libretro/libretro-common/include/formats +CFLAGS += -I platform/libretro/libretro-common/include/streams +CFLAGS += -I platform/libretro/libretro-common/include/string +CFLAGS += -I platform/libretro/libretro-common/include/vfs + +USE_LIBRETRO_VFS := 1 +STATIC_LINKING:= 0 +STATIC_LINKING_LINK:= 0 +LOW_MEMORY := 0 TARGET_NAME := picodrive +LIBM := -lm +GIT_REVISION ?= -$(shell git rev-parse --short HEAD || echo ???) +CFLAGS += -DREVISION=\"$(GIT_REVISION)\" + +fpic := + +ifeq ($(STATIC_LINKING),1) +EXT=a +endif + +# Unix +ifeq ($(platform), unix) + EXT ?= so + TARGET := $(TARGET_NAME)_libretro.$(EXT) + fpic := -fPIC + SHARED := -shared + CFLAGS += -DFAMEC_NO_GOTOS +ifneq ($(findstring SunOS,$(shell uname -a)),) + CC=gcc +endif + +# x86/x86_64 generic +else ifneq (,$(findstring x86,$(platform))) + TARGET := $(TARGET_NAME)_libretro.so + ARCH := x86 + fpic := -fPIC + SHARED := -shared + CFLAGS += -DFAMEC_NO_GOTOS + +# AARCH64 generic +else ifeq ($(platform), aarch64) + TARGET := $(TARGET_NAME)_libretro.so + ARCH = aarch64 + fpic := -fPIC + SHARED := -shared + CFLAGS += -DFAMEC_NO_GOTOS + +# Portable Linux +else ifeq ($(platform), linux-portable) + EXT ?= so + TARGET := $(TARGET_NAME)_libretro.$(EXT) + SHARED := -shared -nostdlib + fpic := -fPIC + LIBM := + CFLAGS += -DFAMEC_NO_GOTOS + +# OS X +else ifeq ($(platform), osx) + EXT ?= dylib + TARGET := $(TARGET_NAME)_libretro.$(EXT) + SHARED := -dynamiclib + fpic := -fPIC + APPLE := 1 + + ifeq ($(CROSS_COMPILE),1) + TARGET_RULE = -target $(LIBRETRO_APPLE_PLATFORM) -isysroot $(LIBRETRO_APPLE_ISYSROOT) + CFLAGS += $(TARGET_RULE) + CPPFLAGS += $(TARGET_RULE) + CXXFLAGS += $(TARGET_RULE) + LDFLAGS += $(TARGET_RULE) + endif + + ifndef ($(NOUNIVERSAL)) + CFLAGS += $(ARCHFLAGS) + LDFLAGS += $(ARCHFLAGS) + endif + CFLAGS += -DUINT8=uint8_t -DUINT16=uint16_t -DUINT32=uint32_t -DINT8=int8_t -DINT16=int16_t -DINT32=int32_t + +else ifeq ($(platform), staticios) + TARGET := $(TARGET_NAME)_libretro_ios.a + APPLE := 1 + ifeq ($(IOSSDK),) + IOSSDK := $(shell xcodebuild -version -sdk iphoneos Path) + endif + CC = clang -arch armv7 -arch arm64 -isysroot $(IOSSDK) + CXX = clang++ -arch armv7 -arch arm64 -isysroot $(IOSSDK) + CC_AS = perl ./tools/gas-preprocessor.pl $(CC) + CFLAGS += -marm + CFLAGS += -DIOS + + CC += -miphoneos-version-min=8.0 + CXX += -miphoneos-version-min=8.0 + CC_AS += -miphoneos-version-min=8.0 + CFLAGS += -miphoneos-version-min=8.0 + + STATIC_LINKING = 1 + STATIC_LINKING_LINK = 1 + +# iOS +else ifneq (,$(findstring ios,$(platform))) + TARGET := $(TARGET_NAME)_libretro_ios.dylib + SHARED := -dynamiclib + fpic := -fPIC + APPLE := 1 + MINVERSION := + ifeq ($(IOSSDK),) + IOSSDK := $(shell xcodebuild -version -sdk iphoneos Path) + endif + ifeq ($(platform),ios-arm64) + CC = clang -arch arm64 -isysroot $(IOSSDK) + CXX = clang++ -arch arm64 -isysroot $(IOSSDK) + CFLAGS += -marm -DARM -D__aarch64__=1 + else + CC = clang -arch armv7 -isysroot $(IOSSDK) + CXX = clang++ -arch armv7 -isysroot $(IOSSDK) + CC_AS = perl ./tools/gas-preprocessor.pl $(CC) + CFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon -marm + ASFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon + NO_ARM_ASM = 1 + endif + CFLAGS += -DIOS + +ifeq ($(platform),$(filter $(platform),ios9 ios-arm64)) + MINVERSION = -miphoneos-version-min=8.0 +else + MINVERSION = -miphoneos-version-min=5.0 +endif + CC += $(MINVERSION) + CXX += $(MINVERSION) + CC_AS += $(MINVERSION) + CFLAGS += $(MINVERSION) + +# tvOS +else ifeq ($(platform), tvos-arm64) + TARGET := $(TARGET_NAME)_libretro_tvos.dylib + SHARED := -dynamiclib + fpic := -fPIC + APPLE := 1 + ifeq ($(IOSSDK),) + IOSSDK := $(shell xcodebuild -version -sdk appletvos Path) + endif + CC_AS = perl ./tools/gas-preprocessor.pl $(CC) + CC = cc -arch arm64 -isysroot $(IOSSDK) + CXX = c++ -arch arm64 -isysroot $(IOSSDK) + CFLAGS += -marm -DARM -D__aarch64__=1 + CFLAGS += -DIOS + +# Lightweight PS3 Homebrew SDK +else ifneq (,$(filter $(platform), ps3 psl1ght)) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = $(PS3DEV)/ppu/bin/ppu-$(COMMONLV)gcc$(EXE_EXT) + AR = $(PS3DEV)/ppu/bin/ppu-$(COMMONLV)ar$(EXE_EXT) + CFLAGS += -DFAMEC_NO_GOTOS -D__PS3__ + STATIC_LINKING = 1 + STATIC_LINKING_LINK = 1 + ifeq ($(platform), psl1ght) + FLAGS += -D__PSL1GHT__ + endif + +# PSP +else ifeq ($(platform), psp1) + ARCH = mipsel + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = psp-gcc$(EXE_EXT) + AR = psp-ar$(EXE_EXT) + CFLAGS += -DPSP -G0 -ftracer + CFLAGS += -I$(shell psp-config --pspsdk-path)/include + STATIC_LINKING = 1 + STATIC_LINKING_LINK = 1 + +# PS2 +else ifeq ($(platform), ps2) + ARCH = mipsel + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = mips64r5900el-ps2-elf-gcc$(EXE_EXT) + AR = mips64r5900el-ps2-elf-ar$(EXE_EXT) + CFLAGS += -Wall -DPS2 -D_EE -DUSE_BGR555 -DFAMEC_NO_GOTOS -DRENDER_GSKIT_PS2 -fsingle-precision-constant + CFLAGS += -I$(PS2DEV)/gsKit/include -I$(PS2SDK)/ee/include -I$(PS2SDK)/common/include + STATIC_LINKING = 1 + STATIC_LINKING_LINK = 1 + +# CTR (3DS) +else ifeq ($(platform), ctr) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = $(DEVKITARM)/bin/arm-none-eabi-gcc$(EXE_EXT) + CXX = $(DEVKITARM)/bin/arm-none-eabi-g++$(EXE_EXT) + AR = $(DEVKITARM)/bin/arm-none-eabi-ar$(EXE_EXT) + CFLAGS += -DARM11 -D_3DS + CFLAGS += -march=armv6k -mtune=mpcore -mfloat-abi=hard -marm -mfpu=vfp + CFLAGS += -Wall -mword-relocations + CFLAGS += -fomit-frame-pointer -ffast-math + STATIC_LINKING = 1 + STATIC_LINKING_LINK = 1 + + OBJS += platform/libretro/3ds/3ds_utils.o platform/libretro/3ds/utils.o + +# Raspberry Pi +else ifneq (,$(findstring rpi,$(platform))) + CFLAGS += -Wall -mword-relocations + CFLAGS += -fomit-frame-pointer -ffast-math + + TARGET := $(TARGET_NAME)_libretro.so + SHARED := -shared + fpic := -fPIC + + ifneq (,$(findstring rpi1,$(platform))) + CFLAGS += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6j + else ifneq (,$(findstring rpi2,$(platform))) + CFLAGS += -marm -mcpu=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard + else ifneq (,$(findstring rpi3,$(platform))) + CFLAGS += -marm -mcpu=cortex-a53 -mfpu=neon-fp-armv8 -mfloat-abi=hard + endif + +# Vita +else ifeq ($(platform), vita) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = arm-vita-eabi-gcc$(EXE_EXT) + AR = arm-vita-eabi-ar$(EXE_EXT) + CFLAGS += -DVITA + CFLAGS += -marm -mfpu=neon -mcpu=cortex-a9 -march=armv7-a -mfloat-abi=hard -ffast-math + CFLAGS += -fno-asynchronous-unwind-tables -ftree-vectorize -funroll-loops + CFLAGS += -mword-relocations -fno-unwind-tables + CFLAGS += -fno-optimize-sibling-calls + STATIC_LINKING = 1 + STATIC_LINKING_LINK = 1 + +# Xbox 360 +else ifeq ($(platform), xenon) + TARGET := $(TARGET_NAME)_libretro_xenon360.a + CC = xenon-gcc$(EXE_EXT) + AR = xenon-ar$(EXE_EXT) + CFLAGS += -D__LIBXENON__ -m32 + +# Nintendo Game Cube +else ifeq ($(platform), ngc) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT) + AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) + CFLAGS += -DGEKKO -DHW_DOL -mrvl -mcpu=750 -meabi -mhard-float + STATIC_LINKING = 1 + STATIC_LINKING_LINK = 1 + +# Nintendo Wii +else ifeq ($(platform), wii) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT) + AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) + CFLAGS += -DGEKKO -DHW_RVL -mrvl -mcpu=750 -meabi -mhard-float -ffat-lto-objects + STATIC_LINKING = 1 + STATIC_LINKING_LINK = 1 + +# Nintendo Wii U +else ifeq ($(platform), wiiu) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT) + CXX = $(DEVKITPPC)/bin/powerpc-eabi-g++$(EXE_EXT) + AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) + CFLAGS += -DGEKKO -DWIIU -DHW_RVL -DHW_WUP -mwup -mcpu=750 -meabi -mhard-float + STATIC_LINKING = 1 + STATIC_LINKING_LINK = 1 + +# Nintendo Switch (libtransistor) +else ifeq ($(platform), switch) + TARGET := $(TARGET_NAME)_libretro_$(platform).a + include $(LIBTRANSISTOR_HOME)/libtransistor.mk + STATIC_LINKING=1 + STATIC_LINKING_LINK = 1 + +# Nintendo Switch (libnx) +else ifeq ($(platform), libnx) + include $(DEVKITPRO)/libnx/switch_rules + TARGET := $(TARGET_NAME)_libretro_$(platform).a + CFLAGS += -O3 -fomit-frame-pointer -ffast-math -I$(DEVKITPRO)/libnx/include/ -fPIE -Wl,--allow-multiple-definition + CFLAGS += -specs=$(DEVKITPRO)/libnx/switch.specs + CFLAGS += -D__SWITCH__ -DHAVE_LIBNX + CFLAGS += -DARM -D__aarch64__=1 -march=armv8-a -mtune=cortex-a57 -mtp=soft -ffast-math -mcpu=cortex-a57+crc+fp+simd -ffunction-sections + CFLAGS += -Ifrontend/switch -ftree-vectorize + STATIC_LINKING=1 + STATIC_LINKING_LINK = 1 + +# QNX +else ifeq ($(platform), qnx) + ARCH = arm + TARGET := $(TARGET_NAME)_libretro_$(platform).so + fpic := -fPIC + CC = qcc -Vgcc_ntoarmv7le + CC_AS = $(CC) + CFLAGS += -DBASE_ADDR_FIXED=0 -D__BLACKBERRY_QNX__ -marm -mcpu=cortex-a9 -mtune=cortex-a9 -mfpu=neon -mfloat-abi=softfp + ASFLAGS += -mcpu=cortex-a9 -mfpu=neon -mfloat-abi=softfp + +# (armv7 a7, hard point, neon based) ### +# NESC, SNESC, C64 mini +else ifeq ($(platform), classic_armv7_a7) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + SHARED := -shared -Wl,--no-undefined,-Bsymbolic + CFLAGS += -Ofast \ + -flto=4 -fuse-linker-plugin \ + -fdata-sections -ffunction-sections -Wl,--gc-sections \ + -fno-stack-protector -fno-ident -fomit-frame-pointer \ + -falign-functions=1 -falign-jumps=1 -falign-loops=1 \ + -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops \ + -fmerge-all-constants -fno-math-errno \ + -marm -mtune=cortex-a7 -mfpu=neon-vfpv4 -mfloat-abi=hard + CXXFLAGS += $(CFLAGS) + CPPFLAGS += $(CFLAGS) + ASFLAGS += $(CFLAGS) + HAVE_NEON = 1 + BUILTIN_GPU = neon + ifeq ($(shell echo `$(CC) -dumpversion` "< 4.9" | bc -l), 1) + CFLAGS += -march=armv7-a + else + CFLAGS += -march=armv7ve + # If gcc is 5.0 or later + ifeq ($(shell echo `$(CC) -dumpversion` ">= 5" | bc -l), 1) + LDFLAGS += -static-libgcc -static-libstdc++ + endif + endif + +# (armv8 a35, hard point, neon based) ### +# Playstation Classic +else ifeq ($(platform), classic_armv8_a35) + TARGET := $(TARGET_NAME)_libretro.so + fpic := -fPIC + SHARED := -shared -Wl,--no-undefined,-Bsymbolic + CFLAGS += -Ofast \ + -flto -fuse-linker-plugin \ + -fdata-sections -ffunction-sections -Wl,--gc-sections \ + -fno-stack-protector -fno-ident -fomit-frame-pointer \ + -falign-functions=1 -falign-jumps=1 -falign-loops=1 \ + -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-unroll-loops \ + -fmerge-all-constants -fno-math-errno -fno-strict-aliasing \ + -marm -mtune=cortex-a35 -mfpu=neon-fp-armv8 -mfloat-abi=hard + CXXFLAGS += $(CFLAGS) + CPPFLAGS += $(CFLAGS) + ASFLAGS += $(CFLAGS) + HAVE_NEON = 1 + BUILTIN_GPU = neon + CFLAGS += -march=armv8-a + LDFLAGS += -static-libgcc -static-libstdc++ + +####################################### + +# ARM-64 +else ifeq ($(platform), arm64) + EXT ?= so + TARGET := $(TARGET_NAME)_libretro.$(EXT) + ARCH = aarch64 + fpic := -fPIC + SHARED := -shared + CFLAGS += -DFAMEC_NO_GOTOS + +# AARCH64 generic +else ifeq ($(platform), aarch64) + TARGET := $(TARGET_NAME)_libretro.so + ARCH = aarch64 + fpic := -fPIC + SHARED := -shared + CFLAGS += -DFAMEC_NO_GOTOS + +# ARM +else ifneq (,$(findstring armv,$(platform))) + TARGET := $(TARGET_NAME)_libretro.so + SHARED := -shared -Wl,--no-undefined,-Bsymbolic + fpic := -fPIC + ifneq (,$(findstring cortexa5,$(platform))) + CFLAGS += -marm -mcpu=cortex-a5 + ASFLAGS += -mcpu=cortex-a5 + else ifneq (,$(findstring cortexa8,$(platform))) + CFLAGS += -marm -mcpu=cortex-a8 + ASFLAGS += -mcpu=cortex-a8 + else ifneq (,$(findstring cortexa9,$(platform))) + CFLAGS += -marm -mcpu=cortex-a9 + ASFLAGS += -mcpu=cortex-a9 + else ifneq (,$(findstring cortexa15a7,$(platform))) + CFLAGS += -marm -mcpu=cortex-a15.cortex-a7 + ASFLAGS += -mcpu=cortex-a15.cortex-a7 + else + CFLAGS += -marm + endif + ifneq (,$(findstring neon,$(platform))) + CFLAGS += -mfpu=neon + ASFLAGS += -mfpu=neon + endif + ifneq (,$(findstring softfloat,$(platform))) + CFLAGS += -mfloat-abi=softfp + ASFLAGS += -mfloat-abi=softfp + else ifneq (,$(findstring hardfloat,$(platform))) + CFLAGS += -mfloat-abi=hard + ASFLAGS += -mfloat-abi=hard + endif + ifeq (,$(findstring armasm,$(platform))) + NO_ARM_ASM = 1 + endif + +# Emscripten +else ifeq ($(platform), emscripten) + TARGET := $(TARGET_NAME)_libretro_$(platform).bc + ARCH = unknown + + STATIC_LINKING = 1 + +# RS90 +else ifeq ($(platform), rs90) + TARGET := $(TARGET_NAME)_libretro.so +ifeq (,$(shell command -v $(RS90_PREFIX)mipsel-rs90-linux-uclibc-gcc 2>/dev/null)) + # locate the toolchain for buildbot if it isn't in path or prefix not set + RS90_PREFIX = /opt/rs90-toolchain/usr/bin/ +endif + CC = $(RS90_PREFIX)mipsel-linux-gcc + AR = $(RS90_PREFIX)mipsel-linux-ar + SHARED := -shared -nostdlib + fpic := -fPIC + LIBM := + CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32 -D__GCW0__ + # clear_cache uses SYNCI instead of a syscall + CFLAGS += -DMIPS_USE_SYNCI + LOW_MEMORY = 1 + +# GCW0 +else ifeq ($(platform), gcw0) + TARGET := $(TARGET_NAME)_libretro.so +ifeq (,$(shell command -v $(GCW0_PREFIX)mipsel-gcw0-linux-uclibc-gcc 2>/dev/null)) + # locate the toolchain for buildbot if it isn't in path or prefix not set + GCW0_PREFIX = /opt/gcw0-toolchain/usr/bin/ +endif + CC = $(GCW0_PREFIX)mipsel-linux-gcc + AR = $(GCW0_PREFIX)mipsel-linux-ar + SHARED := -shared -nostdlib + fpic := -fPIC + LIBM := + CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float -D__GCW0__ + # clear_cache uses SYNCI instead of a syscall + CFLAGS += -DMIPS_USE_SYNCI + +# RETROFW +else ifeq ($(platform), retrofw) + TARGET := $(TARGET_NAME)_libretro.so +ifeq (,$(shell command -v $(GCW0_PREFIX)mipsel-gcw0-linux-uclibc-gcc 2>/dev/null)) + # locate the toolchain for buildbot if it isn't in path or prefix not set + GCW0_PREFIX = /opt/retrofw-toolchain/usr/bin/ +endif + CC = $(GCW0_PREFIX)mipsel-linux-gcc + AR = $(GCW0_PREFIX)mipsel-linux-ar + SHARED := -shared -nostdlib + fpic := -fPIC + LIBM := + CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32 -mhard-float -D__GCW0__ + # clear_cache uses SYNCI instead of a syscall + CFLAGS += -DMIPS_USE_SYNCI + +# MIYOO +else ifeq ($(platform), miyoo) + TARGET := $(TARGET_NAME)_libretro.so + CC = /opt/miyoo/usr/bin/arm-linux-gcc + AR = /opt/miyoo/usr/bin/arm-linux-ar + SHARED := -shared -nostdlib + fpic := -fPIC + LIBM := + CFLAGS += -fomit-frame-pointer -ffast-math -march=armv5te -mtune=arm926ej-s -D__GCW0__ + HAVE_ARMv6 = 0 + LOW_MEMORY = 1 + +# Windows MSVC 2017 all architectures +else ifneq (,$(findstring windows_msvc2017,$(platform))) + NO_GCC := 1 + + PlatformSuffix = $(subst windows_msvc2017_,,$(platform)) + ifneq (,$(findstring desktop,$(PlatformSuffix))) + WinPartition = desktop + MSVC2017CompileFlags = -DWINAPI_FAMILY=WINAPI_FAMILY_DESKTOP_APP -FS + LDFLAGS += -MANIFEST -LTCG:incremental -NXCOMPAT -DYNAMICBASE -DEBUG -OPT:REF -INCREMENTAL:NO -SUBSYSTEM:WINDOWS -MANIFESTUAC:"level='asInvoker' uiAccess='false'" -OPT:ICF -ERRORREPORT:PROMPT -NOLOGO -TLBID:1 + LDLIBS += kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib + else ifneq (,$(findstring uwp,$(PlatformSuffix))) + WinPartition = uwp + MSVC2017CompileFlags = -DWINAPI_FAMILY=WINAPI_FAMILY_APP -D_WINDLL -D_UNICODE -DUNICODE -D__WRL_NO_DEFAULT_LIB__ -EHsc -FS + LDFLAGS += -APPCONTAINER -NXCOMPAT -DYNAMICBASE -MANIFEST:NO -LTCG -OPT:REF -SUBSYSTEM:CONSOLE -MANIFESTUAC:NO -OPT:ICF -ERRORREPORT:PROMPT -NOLOGO -TLBID:1 -DEBUG:FULL -WINMD:NO + LDLIBS += WindowsApp.lib + endif + + ARCH = x86_64 + SHARED := + LIBM := + NO_ALIGN_FUNCTIONS = 1 + + CFLAGS += -DHAVE_VSNPRINTF + CFLAGS += $(MSVC2017CompileFlags) + CXXFLAGS += $(MSVC2017CompileFlags) + + TargetArchMoniker = $(subst $(WinPartition)_,,$(PlatformSuffix)) + + CC = cl.exe + CXX = cl.exe + LD = link.exe + + reg_query = $(call filter_out2,$(subst $2,,$(shell reg query "$2" -v "$1" 2>nul))) + fix_path = $(subst $(SPACE),\ ,$(subst \,/,$1)) + + ProgramFiles86w := $(shell cmd //c "echo %PROGRAMFILES(x86)%") + ProgramFiles86 := $(shell cygpath "$(ProgramFiles86w)") + + WindowsSdkDir ?= $(call reg_query,InstallationFolder,HKEY_LOCAL_MACHINE\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0) + WindowsSdkDir ?= $(call reg_query,InstallationFolder,HKEY_CURRENT_USER\SOFTWARE\Wow6432Node\Microsoft\Microsoft SDKs\Windows\v10.0) + WindowsSdkDir ?= $(call reg_query,InstallationFolder,HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0) + WindowsSdkDir ?= $(call reg_query,InstallationFolder,HKEY_CURRENT_USER\SOFTWARE\Microsoft\Microsoft SDKs\Windows\v10.0) + WindowsSdkDir := $(WindowsSdkDir) + + WindowsSDKVersion ?= $(firstword $(foreach folder,$(subst $(subst \,/,$(WindowsSdkDir)Include/),,$(wildcard $(call fix_path,$(WindowsSdkDir)Include\*))),$(if $(wildcard $(call fix_path,$(WindowsSdkDir)Include/$(folder)/um/Windows.h)),$(folder),)))$(BACKSLASH) + WindowsSDKVersion := $(WindowsSDKVersion) + + VsInstallBuildTools = $(ProgramFiles86)/Microsoft Visual Studio/2017/BuildTools + VsInstallEnterprise = $(ProgramFiles86)/Microsoft Visual Studio/2017/Enterprise + VsInstallProfessional = $(ProgramFiles86)/Microsoft Visual Studio/2017/Professional + VsInstallCommunity = $(ProgramFiles86)/Microsoft Visual Studio/2017/Community + + VsInstallRoot ?= $(shell if [ -d "$(VsInstallBuildTools)" ]; then echo "$(VsInstallBuildTools)"; fi) + ifeq ($(VsInstallRoot), ) + VsInstallRoot = $(shell if [ -d "$(VsInstallEnterprise)" ]; then echo "$(VsInstallEnterprise)"; fi) + endif + ifeq ($(VsInstallRoot), ) + VsInstallRoot = $(shell if [ -d "$(VsInstallProfessional)" ]; then echo "$(VsInstallProfessional)"; fi) + endif + ifeq ($(VsInstallRoot), ) + VsInstallRoot = $(shell if [ -d "$(VsInstallCommunity)" ]; then echo "$(VsInstallCommunity)"; fi) + endif + VsInstallRoot := $(VsInstallRoot) + + VcCompilerToolsVer := $(shell cat "$(VsInstallRoot)/VC/Auxiliary/Build/Microsoft.VCToolsVersion.default.txt" | grep -o '[0-9\.]*') + VcCompilerToolsDir := $(VsInstallRoot)/VC/Tools/MSVC/$(VcCompilerToolsVer) + + WindowsSDKSharedIncludeDir := $(shell cygpath -w "$(WindowsSdkDir)\Include\$(WindowsSDKVersion)\shared") + WindowsSDKUCRTIncludeDir := $(shell cygpath -w "$(WindowsSdkDir)\Include\$(WindowsSDKVersion)\ucrt") + WindowsSDKUMIncludeDir := $(shell cygpath -w "$(WindowsSdkDir)\Include\$(WindowsSDKVersion)\um") + WindowsSDKUCRTLibDir := $(shell cygpath -w "$(WindowsSdkDir)\Lib\$(WindowsSDKVersion)\ucrt\$(TargetArchMoniker)") + WindowsSDKUMLibDir := $(shell cygpath -w "$(WindowsSdkDir)\Lib\$(WindowsSDKVersion)\um\$(TargetArchMoniker)") + + # For some reason the HostX86 compiler doesn't like compiling for x64 + # ("no such file" opening a shared library), and vice-versa. + # Work around it for now by using the strictly x86 compiler for x86, and x64 for x64. + # NOTE: What about ARM? + ifneq (,$(findstring x64,$(TargetArchMoniker))) + VCCompilerToolsBinDir := $(VcCompilerToolsDir)\bin\HostX64 + else + VCCompilerToolsBinDir := $(VcCompilerToolsDir)\bin\HostX86 + endif + + PATH := $(shell IFS=$$'\n'; cygpath "$(VCCompilerToolsBinDir)/$(TargetArchMoniker)"):$(PATH) + PATH := $(PATH):$(shell IFS=$$'\n'; cygpath "$(VsInstallRoot)/Common7/IDE") + INCLUDE := $(shell IFS=$$'\n'; cygpath -w "$(VcCompilerToolsDir)/include") + LIB := $(shell IFS=$$'\n'; cygpath -w "$(VcCompilerToolsDir)/lib/$(TargetArchMoniker)") + ifneq (,$(findstring uwp,$(PlatformSuffix))) + LIB := $(shell IFS=$$'\n'; cygpath -w "$(LIB)/store") + endif + + export INCLUDE := $(INCLUDE);$(WindowsSDKSharedIncludeDir);$(WindowsSDKUCRTIncludeDir);$(WindowsSDKUMIncludeDir) + export LIB := $(LIB);$(WindowsSDKUCRTLibDir);$(WindowsSDKUMLibDir) + TARGET := $(TARGET_NAME)_libretro.dll + PSS_STYLE :=2 + LDFLAGS += -DLL + +# Windows +else + TARGET := $(TARGET_NAME)_libretro.dll + CC ?= gcc + fpic := -fPIC + SHARED := -shared -static-libgcc -static-libstdc++ + CFLAGS += -D__WIN32__ -D__WIN32_LIBRETRO__ + +endif + +CFLAGS += -D__LIBRETRO__ + +ifeq ($(USE_LIBRETRO_VFS),1) + CFLAGS += -DUSE_LIBRETRO_VFS +endif + +ifeq ($(LOW_MEMORY), 1) + CFLAGS += -DLOW_MEMORY +endif + +ifeq ($(NO_ARM_ASM),1) +use_cyclone = 0 +use_fame = 1 +use_drz80 = 0 +use_cz80 = 1 +use_svpdrc = 0 asm_memory = 0 asm_render = 0 asm_ym2612 = 0 asm_misc = 0 -asm_cdpico = 0 asm_cdmemory = 0 asm_mix = 0 - -ifeq ($(platform), unix) - TARGET := $(TARGET_NAME)_libretro.so - SHARED := -shared -else ifeq ($(platform), osx) - TARGET := $(TARGET_NAME)_libretro.dylib - SHARED := -dynamiclib -else ifeq ($(platform), ios) - TARGET := $(TARGET_NAME)_libretro_ios.dylib - SHARED := -dynamiclib - - CC = clang -arch armv7 -isysroot $(IOSSDK) - CXX = clang++ -arch armv7 -isysroot $(IOSSDK) - CC_AS = perl ./tools/gas-preprocessor.pl $(CC) - CFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon -marm - ASFLAGS += -mcpu=cortex-a8 -mtune=cortex-a8 -mfpu=neon - CFLAGS += -DIOS - - ARCH := arm - - use_cyclone = 0 - use_fame = 1 - use_drz80 = 0 - use_cz80 = 1 - use_sh2drc = 1 - use_svpdrc = 1 - -else ifeq ($(platform), ps3) - TARGET := $(TARGET_NAME)_libretro_ps3.a - CC = $(CELL_SDK)/host-win32/ppu/bin/ppu-lv2-gcc.exe - AR = $(CELL_SDK)/host-win32/ppu/bin/ppu-lv2-ar.exe - CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -else ifeq ($(platform), sncps3) - TARGET := $(TARGET_NAME)_libretro_ps3.a - CC = $(CELL_SDK)/host-win32/sn/bin/ps3ppusnc.exe - AR = $(CELL_SDK)/host-win32/sn/bin/ps3snarl.exe - CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -else ifeq ($(platform), psl1ght) - TARGET := $(TARGET_NAME)_libretro_psl1ght.a - CC = $(PS3DEV)/ppu/bin/ppu-gcc$(EXE_EXT) - AR = $(PS3DEV)/ppu/bin/ppu-ar$(EXE_EXT) - CFLAGS += -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -else ifeq ($(platform), psp1) - TARGET := $(TARGET_NAME)_libretro_psp1.a - CC = psp-gcc$(EXE_EXT) - AR = psp-ar$(EXE_EXT) - CFLAGS += -DPSP -G0 -else ifeq ($(platform), xenon) - TARGET := $(TARGET_NAME)_libretro_xenon360.a - CC = xenon-gcc$(EXE_EXT) - AR = xenon-ar$(EXE_EXT) - CFLAGS += -D__LIBXENON__ -m32 -D__ppc__ -else ifeq ($(platform), ngc) - TARGET := $(TARGET_NAME)_libretro_ngc.a - CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT) - AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) - CFLAGS += -DGEKKO -DHW_DOL -mrvl -mcpu=750 -meabi -mhard-float -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -else ifeq ($(platform), wii) - TARGET := libretro_$(TARGET_NAME)_wii.a - CC = $(DEVKITPPC)/bin/powerpc-eabi-gcc$(EXE_EXT) - AR = $(DEVKITPPC)/bin/powerpc-eabi-ar$(EXE_EXT) - CFLAGS += -DGEKKO -DHW_RVL -mrvl -mcpu=750 -meabi -mhard-float -DBLARGG_BIG_ENDIAN=1 -D__ppc__ -else ifeq ($(platform), qnx) - TARGET := $(TARGET_NAME)_libretro_qnx.so - CC = qcc -Vgcc_ntoarmv7le - CC_AS = $(CC) - CFLAGS += -DBASE_ADDR_FIXED=0 -D__BLACKBERRY_QNX__ -marm -mcpu=cortex-a9 -mtune=cortex-a9 -mfpu=neon -mfloat-abi=softfp - ASFLAGS += -mcpu=cortex-a9 -mfpu=neon -mfloat-abi=softfp - - ARCH = arm - ARM_ASM = 1 -else ifneq (,$(findstring armv,$(platform))) - TARGET := $(TARGET_NAME)_libretro.so - SHARED := -shared -Wl,--no-undefined -ifneq (,$(findstring cortexa8,$(platform))) - CFLAGS += -marm -mcpu=cortex-a8 - ASFLAGS += -mcpu=cortex-a8 -else ifneq (,$(findstring cortexa9,$(platform))) - CFLAGS += -marm -mcpu=cortex-a9 - ASFLAGS += -mcpu=cortex-a9 -endif - CFLAGS += -marm -ifneq (,$(findstring neon,$(platform))) - CFLAGS += -mfpu=neon - ASFLAGS += -mfpu=neon -endif -ifneq (,$(findstring softfloat,$(platform))) - CFLAGS += -mfloat-abi=softfp - ASFLAGS += -mfloat-abi=softfp -else ifneq (,$(findstring hardfloat,$(platform))) - CFLAGS += -mfloat-abi=hard - ASFLAGS += -mfloat-abi=hard -endif -ifneq (,$(findstring armasm,$(platform))) - ARM_ASM = 1 -endif - ARCH = arm -else - TARGET := $(TARGET_NAME)_retro.dll - CC = gcc - LD_FLAGS := -fPIC - SHARED := -shared -static-libgcc -static-libstdc++ - CFLAGS += -D__WIN32__ -D__WIN32_LIBRETRO__ +asm_32xdraw = 0 +asm_32xmemory = 0 endif -ifeq ($(ARM_ASM),1) -asm_memory = 1 -asm_render = 1 -asm_ym2612 = 1 -asm_misc = 1 -asm_cdpico = 1 -asm_cdmemory = 1 -asm_mix = 1 +ifeq ($(APPLE),1) +# turn off DRCs on Apple OSes. It needs signing and notarizing on the +# later versions, which picodrive isn't supporting right now. +use_sh2drc = 0 +use_svpdrc = 0 +endif + +CFLAGS += $(fpic) + +ifeq ($(findstring Haiku,$(shell uname -a)),) + LDLIBS += $(LIBM) endif -CFLAGS += -fPIC -LDLIBS += -lm SHARED ?= -shared -LDFLAGS += $(SHARED) +LDFLAGS += $(SHARED) $(fpic) +ifeq ($(ARCH),) +ARCH = $(shell $(CC) -dumpmachine | awk -F '-' '{print $$1}') +endif PLATFORM = libretro NO_CONFIG_MAK = yes +OBJOUT = -o +LINKOUT = -o + +ifneq (,$(findstring msvc,$(platform))) + CFLAGS += -wd4702 -wd4711 -wd4202 -wd4101 +endif + +ifeq ($(DEBUG), 1) + ifneq (,$(findstring msvc,$(platform))) + ifeq ($(STATIC_LINKING),1) + CFLAGS += -MTd + CXXFLAGS += -MTd + else + CFLAGS += -MDd + CXXFLAGS += -MDd + endif + + CFLAGS += -Od -Zi -DDEBUG -D_DEBUG + CXXFLAGS += -Od -Zi -DDEBUG -D_DEBUG + else + CFLAGS += -O0 -g -DDEBUG + CXXFLAGS += -O0 -g -DDEBUG + endif +else + ifneq (,$(findstring msvc,$(platform))) + ifeq ($(STATIC_LINKING),1) + CFLAGS += -MT + CXXFLAGS += -MT + else + CFLAGS += -MD + CXXFLAGS += -MD + endif + + CFLAGS += -O2 -DNDEBUG + CXXFLAGS += -O2 -DNDEBUG + else + CFLAGS += -O3 -DNDEBUG + CXXFLAGS += -O3 -DNDEBUG + endif +endif + +ifneq (,$(findstring msvc,$(platform))) + OBJOUT = -Fo + LINKOUT = -out: +ifeq ($(STATIC_LINKING),1) + LD ?= lib.exe + STATIC_LINKING=0 +else + LD = link.exe +endif +else + LD = $(CC) +endif + +PLATFORM_ZLIB ?= 1 + include Makefile + +ifeq ($(platform), osx) +pico/cd/libchdr/src/libchdr_chd.o: CFLAGS += -D__MACTYPES__=1 +endif diff --git a/README.md b/README.md new file mode 100644 index 00000000..d418999d --- /dev/null +++ b/README.md @@ -0,0 +1,55 @@ +# Sonic's UGC for PSP + +A fork of PicoDrive for PSP, skinned to look like Sonic's Ultimate Genesis Collection + +### Why? + +In 2019 (when i originally had this idea) i had just gotten a PSP, and in 2016-2020 i had a copy of Sonic's UGC on the Xbox 360, i knew that the PSP was very much powerful enough to run the same library of games, and this was on the back of my head until August of 2024, when i finally gained enough skill to make this into a reality. + +**Fun Fact** - I did make a **terrible** version of this idea in 2019 with my limited brain, which i still have archived today, (i stretched the SUGC logo to fit on the PBP, changed the background which made the game list almost unreadable, and a few other horrible hacks), but, it didn't involve any source code. This time i'm doing it for real, redesigning the UI and open sourcing all of it + +### Downloads + +Soon! + +### To-do List + +| Done | Feature | Progress | +| ------- | --------------------- | -------- | +| ✅ (M1) | ISO Support | 100% | +| ✅ (M1) | Game List | 100% | +| ✅ (M2) | Title Screen | 100% | +| ✅ (M2) | Backgrounds | 100% | +| ✅ (M2) | SRAM in SAVEDATA | 100% | +| ✳️ (M3) | Bunken Tech Fontmap | 100% | +| ✳️ (M3) | Background Theme | 35% | +| ❌ (M4) | ROM Selector UI | 0% | +| ❌ (M5) | ROM Boxart UI | 0% | +| ❌ (M6) | Rating System | 0% | +| ❓ (V1) | Customizer Tool | 0% | + +### Q&A + +**Q: Will you make this for the European version of Sonic's UGC (Sega Mega Drive Ultimate Collection // SEGA MDUC)?** +A: Originally i wasn't going to, but after making a separate branch to do it, i'm definitely going to release it. Up-to-date Development builds will ALWAYS be based on SUGC though. + +**Q: Will you make a PS2 Port? It looks really similar to the PSP version!** +A: Never. I haven't owned a PS2 since 2013 (when i was a fucking baby) and don't plan to. + +**Q: How can i add more games?** +A: Download or compile the Memory Stick version, go to or create the "rom" directory and add your games there, the file format doesn't matter + +**Q: How do i compile on Windows?** +A: Not supported. I haven't used Windows as my daily OS since 2021 and have no plans to go back. Modern Windows is terrible and i have no reason to switch back (the biggest hurdle was Camtasia, which i finally stopped using after 6 years). Just use a Linux VM if you can't make the switch + +**Q: Does this work on PS Vita?** +A: Yes, just use Adrenaline and you're good, both the ISO and Memory Stick versions should work with no problem. + +**Q: Does this work on PPSSPP?** +A: Absolutely! I used PPSSPP for testing the dev builds and it works flawlessly, but if you're gonna emulate SEGA Genesis games on an emulator, just use a normal Sega Genesis emulator, this is largely intended for people who own a PSP or PS Vita console. + +**Q: How did you get an uncompressed version of the SUGC Intro and Soundtrack** +A: I ripped them myself from the Xbox 360 and PS3 version, just extract the files from the ISO and you should be good. ([Intro Video (X360)](https://dl.raythefox.pw/Projects/Sonic%27s%20UGC%20for%20PSP/Assets/SGC2_ATTRACT.wmv) - [Background Theme (Retro Dreams) (X360)](https://dl.raythefox.pw/Projects/Sonic%27s%20UGC%20for%20PSP/Assets/retro_dreams.xma) - [Background Video (X360)](https://dl.raythefox.pw/Projects/Sonic%27s%20UGC%20for%20PSP/Assets/MAIN0001.wmv) - [SUGC Logo (PS3)](https://dl.raythefox.pw/Projects/Sonic%27s%20UGC%20for%20PSP/Assets/SUGC_LOGO.PNG) - [SMDUC Logo (PS3)](https://dl.raythefox.pw/Projects/Sonic%27s%20UGC%20for%20PSP/Assets/SMDUC_LOGO.PNG) - [XMB BGM (PS3)](https://dl.raythefox.pw/Projects/Sonic%27s%20UGC%20for%20PSP/Assets/SND0.AT3) - [XMB BGM (PS3, converted to WAV)](https://dl.raythefox.pw/Projects/Sonic%27s%20UGC%20for%20PSP/Assets/SND0.wav) - [XMB BGM (PS3, converted to MP3)](https://dl.raythefox.pw/Projects/Sonic%27s%20UGC%20for%20PSP/Assets/SND0.mp3)) + +**Q: How did you figure out this really confusing source code?** +A: I admit that the PicoDrive source code is *almost* unreadable, and i definitely wouldn't recommend it to a beginner, but even if you are total trash at C code like i am, if you know where to look, it's not that hard, 99% of the changes were in the UI and the PSP-specific code, not on the emulator itself, i'm pretty sure it's possible to port this to Windows/Linux/PS2, but i have no desire in doing it myself, i can provide all of the design files, [my DMs are always open](https://raythefox.pw), message me and i'll be glad to help. diff --git a/configure b/configure index 097a2764..deb9b75c 100755 --- a/configure +++ b/configure @@ -10,53 +10,63 @@ rm -f config.log compile_object() { - c="$CC $CFLAGS -c $TMPC -o $TMPO $@" + c="$CC $MFLAGS $CFLAGS -c $TMPC -o $TMPO $@" echo $c >> config.log $c >> config.log 2>&1 } compile_binary() { - c="$CC $CFLAGS $TMPC -o $TMPB $LDFLAGS $@" + c="$CC $MFLAGS $CFLAGS $TMPC -o $TMPB $LDFLAGS $@ $SYSLIBS" echo $c >> config.log $c >> config.log 2>&1 } +check_option() +{ + echo 'void test(void) { }' >$TMPC + compile_object $1 || return 1 + return 0 +} + check_define() { - $CC -E -dD $CFLAGS pico/arm_features.h | grep -q $1 || return 1 + $CC -E -dD $MFLAGS $CFLAGS pico/arm_features.h | grep -q "define[ ]*$1" || return 1 return 0 } # setting options to "yes" or "no" will make that choice default, # "" means "autodetect". -platform_list="generic pandora gp2x" +# TODO this is annoyingly messy. should have platform and device +platform_list="generic pandora gph dingux retrofw opendingux[-gcw0] odbeta[-gcw0] miyoo rpi1 rpi2 ps2 psp" platform="generic" sound_driver_list="oss alsa sdl" sound_drivers="" have_armv5="" have_armv6="" have_armv7="" +have_arm_oabi="" have_arm_neon="" have_libavcodec="" +have_libchdr="" +have_gles="no" need_sdl="no" -need_xlib="no" -# these are for known platforms -optimize_cortexa8="no" -optimize_arm926ej="no" -optimize_arm920="no" +need_zlib="no" # hardcoded stuff CC="${CC-${CROSS_COMPILE}gcc}" CXX="${CXX-${CROSS_COMPILE}g++}" AS="${AS-${CROSS_COMPILE}as}" -MAIN_LDLIBS="$LDLIBS -lm" +STRIP="${STRIP-${CROSS_COMPILE}strip}" +LD="${LD-${CROSS_COMPILE}gcc}" # Use better gcc for linking +SYSROOT=`$CC $CFLAGS $LDFLAGS --print-sysroot 2> /dev/null || true` config_mak="config.mak" fail() { echo "$@" + if test -n "$DUMP_CONFIG_LOG"; then cat config.log; fi exit 1 } @@ -64,23 +74,91 @@ fail() set_platform() { platform=$1 + CFLAGS="$CFLAGS -D__`echo ${platform%-*} | tr '[a-z]' '[A-Z]'`__" case "$platform" in + rpi1) + MFLAGS="-mcpu=arm1176jzf-s -mfpu=vfp" + have_gles="yes" + ;; + rpi2) + MFLAGS="-mcpu=cortex-a7 -mfpu=neon" + have_gles="yes" + ;; generic) + MFLAGS="" + ;; + dingux) + # dingoo a320, ritmix rzx-50, the like. all have Ingenic MIPS cpu <= JZ4755 + sound_drivers="sdl" + # use static linking since the lib situation is ... let's say vague + #LDFLAGS="$LDFLAGS -static" + # uses a pre-gcw0 version of opendingux + MFLAGS="-march=mips32 -msoft-float" + platform="opendingux" + ;; + retrofw) + # devices using retrofw. AFAIK all have Ingenic MIPS JZ4760 with fpu + sound_drivers="sdl" + # uses it's own modified version of opendingux + MFLAGS="-march=mips32" + platform="opendingux" + ;; + opendingux | opendingux-gcw0) + # more modern devices using opendingux, with Ingenic MIPS JZ4770 or newer + sound_drivers="sdl" + # mostly based on opendingux for gcw0 + CFLAGS="$CFLAGS -D__OPENDINGUX__" + [ "${platform#*gcw0}" = "" ] && CFLAGS="$CFLAGS -D__GCW0__" + MFLAGS="-march=mips32r2" + platform="opendingux" + ;; + miyoo) + # Miyoo BittBoy, PocketGO 1, PowKiddy V90/Q90 with Allwinner F1C100s + sound_drivers="sdl" + CFLAGS="$CFLAGS -D__OPENDINGUX__" + MFLAGS="-mcpu=arm926ej-s -marm" + platform="opendingux" + ;; + odbeta | odbeta-gcw0) + # various devices with opendingux beta, arch flags from toolchain default + sound_drivers="sdl" + CFLAGS="$CFLAGS -D__OPENDINGUX__" + [ "${platform#*gcw0}" = "" ] && CFLAGS="$CFLAGS -D__GCW0__" + MFLAGS="" # toolchains are arch specific + platform="opendingux" ;; pandora) sound_drivers="oss alsa" - optimize_cortexa8="yes" - have_arm_neon="yes" + have_libavcodec="yes" + MFLAGS="-mcpu=cortex-a8 -mfpu=neon -mfloat-abi=softfp" ;; - gp2x) + gph) sound_drivers="oss" - optimize_arm920="yes" + # compile for OABI if toolchain provides it (faster code on caanoo) + have_arm_oabi="yes" + # always use static linking, since caanoo doesn't have OABI libs. Moreover, + # dynamic linking slows Wiz 1-10%, and libm on F100 isn't compatible + LDFLAGS="$LDFLAGS -static" + # unified binary for all of them. picodrive detects device type for itself. CFLAGS="$CFLAGS -D__GP2X__" - if [ "$CROSS_COMPILE" = "arm-linux-" ]; then - # still using static, dynamic linking slows Wiz 1-10% - # also libm on F100 is not compatible - MAIN_LDLIBS="$MAIN_LDLIBS -static" - fi + # add -mfpu=fpa to select correct parameter passing for -msoft-float :-/ + MFLAGS="-mcpu=arm920t -mfloat-abi=soft -mfpu=fpa" + platform="gp2x" + ;; + psp) + # use newlib + SYSLIBS="-lc -lpspuser -lpspkernel" + MFLAGS="-march=allegrex" + ARCH=mipsel + ;; + ps2) + # use newlib + SYSLIBS="" + MFLAGS="" + ARCH=mips64r5900el + ASFLAGS="$ASFLAGS -G0 -call_shared" + CFLAGS="$CFLAGS -D_EE -G0 -I${PS2SDK}/ee/include -I${PS2SDK}/common/include -I${PS2DEV}/gsKit/include -I${PS2SDK}/ports/include" + LDFLAGS="$LDFLAGS -Wl,-zmax-page-size=128 -T${PS2SDK}/ee/startup/linkfile -L${PS2SDK}/ee/lib -L${PS2DEV}/gsKit/lib -L${PS2SDK}/ports/lib" ;; *) fail "unsupported platform: $platform" @@ -97,6 +175,12 @@ for opt do ;; --sound-drivers=*) sound_drivers="$optarg" ;; + --with-libavcodec=*) have_libavcodec="$optarg" + ;; + --with-sdl-gles=*) have_gles="$optarg" + ;; + --with-zlib=*) need_zlib="$optarg" + ;; *) echo "ERROR: unknown option $opt"; show_help="yes" ;; esac @@ -109,8 +193,11 @@ if [ "$show_help" = "yes" ]; then echo " available: $platform_list" echo " --sound-drivers=LIST sound output drivers [guessed]" echo " available: $sound_driver_list" + echo " --with-libavcodec=yes|no use libavcodec for mp3 decoding" + echo " --with-sdl-gles=yes|no enable GLES usage for SDL" + echo " --with-zlib=yes|no use internal zlib" echo "influential environment variables:" - echo " CROSS_COMPILE CC CXX AS CFLAGS ASFLAGS LDFLAGS LDLIBS" + echo " CROSS_COMPILE CC CXX AS STRIP CFLAGS ASFLAGS LDFLAGS LDLIBS" exit 1 fi @@ -133,34 +220,22 @@ fi # fi #fi -# basic compiler test -cat > $TMPC < $TMPC < $TMPC < - int main(void) { uncompress(0, 0, 0, 0); } + int main (int argc, char *argv[]) { uncompress(0, 0, 0, 0); } EOF - compile_binary + compile_binary "$@" } check_libpng() { cat > $TMPC < - void main() { png_init_io(0, 0); } + int main (int argc, char *argv[]) { png_init_io(0, 0); } EOF # compile_binary compile_object @@ -254,7 +324,7 @@ check_oss() cat > $TMPC < #include - void main() { int a=0; ioctl(0, SNDCTL_DSP_SETFMT, &a); } + int main (int argc, char *argv[]) { int a=0; ioctl(0, SNDCTL_DSP_SETFMT, &a); } EOF compile_binary } @@ -263,7 +333,7 @@ check_alsa() { cat > $TMPC < - void main() { snd_pcm_open(0, 0, 0, 0); } + int main (int argc, char *argv[]) { snd_pcm_open(0, 0, 0, 0); } EOF compile_binary "$@" } @@ -272,7 +342,7 @@ check_sdl() { cat > $TMPC < - void main() { SDL_OpenAudio(0, 0); } + int main (int argc, char *argv[]) { SDL_OpenAudio(0, 0); } EOF compile_binary "$@" } @@ -281,25 +351,49 @@ check_libavcodec() { cat > $TMPC < - void main() { avcodec_decode_audio3(0, 0, 0, 0); } + int main (int argc, char *argv[]) { avcodec_decode_audio3(0, 0, 0, 0); } EOF compile_object "$@" } -#MAIN_LDLIBS="$MAIN_LDLIBS -lz" -#check_zlib || fail "please install zlib (libz-dev)" +check_libchdr() +{ + cat > $TMPC < + int main (int argc, char *argv[]) { chd_open("", 0, NULL, NULL); } +EOF + compile_object "$@" +} + +test -n "$SDL_CONFIG" || SDL_CONFIG="$(ls $SYSROOT/*bin*/sdl-config 2>/dev/null | grep /bin/sdl-config | head -n 1)" +test -n "$SDL_CONFIG" || SDL_CONFIG="$(ls $SYSROOT/*/*bin*/sdl-config 2>/dev/null | grep /bin/sdl-config | head -n 1)" +#test -n "$SDL_CONFIG" || SDL_CONFIG="$(ls $SYSROOT/*bin*/sdl2-config 2>/dev/null | grep /bin/sdl2-config | head -n 1)" +#test -n "$SDL_CONFIG" || SDL_CONFIG="$(ls $SYSROOT/*/*bin*/sdl2-config 2>/dev/null | grep /bin/sdl2-config | head -n 1)" +SDLVERSION=sdl && echo $SDL_CONFIG | grep -q sdl2 && SDLVERSION=sdl2 +test -n "$SDL_CONFIG" || SDL_CONFIG=false + +MAIN_LDLIBS="$LDLIBS -lm" + +check_zlib -lz && MAIN_LDLIBS="$MAIN_LDLIBS -lz" || need_zlib="yes" MAIN_LDLIBS="-lpng $MAIN_LDLIBS" check_libpng || fail "please install libpng (libpng-dev)" -if check_libavcodec; then - have_libavcodec="yes" - # add -ldl if needed - case "$MAIN_LDLIBS" in - *"-ldl"*) ;; - *) MAIN_LDLIBS="-ldl $MAIN_LDLIBS" ;; - esac -fi +case "$have_libavcodec" in +y|Y|yes) + if check_libavcodec; then + have_libavcodec="yes" + need_dl=yes + else + have_libavcodec="no" + fi ;; +*) have_libavcodec="no" ;; +esac + +#if check_libchdr; then +# have_libchdr="yes" +# MAIN_LDLIBS="-lchdr $MAIN_LDLIBS" +#fi # find what audio support we can compile if [ "x$sound_drivers" = "x" ]; then @@ -308,7 +402,7 @@ if [ "x$sound_drivers" = "x" ]; then sound_drivers="$sound_drivers alsa" MAIN_LDLIBS="-lasound $MAIN_LDLIBS" fi - if [ "$need_sdl" = "yes" ] || check_sdl `sdl-config --cflags --libs`; then + if [ "$need_sdl" = "yes" ] || check_sdl `$SDL_CONFIG --cflags --libs`; then sound_drivers="$sound_drivers sdl" need_sdl="yes" fi @@ -323,17 +417,27 @@ else fi if [ "$need_sdl" = "yes" ]; then - which sdl-config > /dev/null || \ + [ -x "$SDL_CONFIG" ] || \ fail "sdl-config is missing; please install libsdl (libsdl1.2-dev)" - CFLAGS="$CFLAGS `sdl-config --cflags`" - MAIN_LDLIBS="`sdl-config --libs` $MAIN_LDLIBS" - check_sdl `sdl-config --libs` || fail "please install libsdl (libsdl1.2-dev)" + CFLAGS="$CFLAGS `$SDL_CONFIG --cflags`" + MAIN_LDLIBS="`$SDL_CONFIG --libs` $MAIN_LDLIBS" + SYSLIBS="$SYSLIBS -ldl" + need_dl=yes + check_sdl `$SDL_CONFIG --libs` || fail "please install libsdl (libsdl1.2-dev)" + if [ "$SDLVERSION" = "sdl2" ]; then + CFLAGS="$CFLAGS -D__USE_SDL2__" + fi fi -cat > $TMPC < $config_mak @@ -360,8 +466,10 @@ echo >> $config_mak echo "CC = $CC" >> $config_mak echo "CXX = $CXX" >> $config_mak echo "AS = $AS" >> $config_mak -echo "CFLAGS += $CFLAGS" >> $config_mak -echo "ASFLAGS += $ASFLAGS" >> $config_mak +echo "STRIP = $STRIP" >> $config_mak +echo "LD = $LD" >> $config_mak +echo "CFLAGS += $MFLAGS $CFLAGS" >> $config_mak +echo "ASFLAGS += $MFLAGS $ASFLAGS" >> $config_mak echo "LDFLAGS += $LDFLAGS" >> $config_mak echo "LDLIBS += $MAIN_LDLIBS" >> $config_mak echo >> $config_mak @@ -369,18 +477,31 @@ echo >> $config_mak echo "ARCH = $ARCH" >> $config_mak echo "PLATFORM = $platform" >> $config_mak echo "SOUND_DRIVERS = $sound_drivers" >> $config_mak +if [ "$have_gles" = "yes" ]; then + echo "HAVE_GLES = 1" >> $config_mak +fi if [ "$have_libavcodec" = "yes" ]; then echo "HAVE_LIBAVCODEC = 1" >> $config_mak fi +if [ "$have_libchdr" = "yes" ]; then + echo "HAVE_LIBCHDR = 1" >> $config_mak +fi +if [ "$need_zlib" = "yes" ]; then + echo "PLATFORM_ZLIB = 1" >> $config_mak +fi +if [ "$ARCH" = "arm" -a "$have_armv6" != "yes" -a "$have_armv7" != "yes" ]; then + # pass info to cyclone not to use newer arm arch instructions + echo "HAVE_ARMv6 = 0" >> $config_mak +fi # GP2X toolchains are too old for UAL asm, # so add this here to not litter main Makefile -if [ "$platform" = "g1p2x" ]; then - echo >> $config_mak - echo "%.o: %.S" >> $config_mak - echo " $(CC) $(CFLAGS) -E -c $^ -o /tmp/$(notdir $@).s" >> $config_mak - echo " $(AS) $(ASFLAGS) /tmp/$(notdir $@).s -o $@" >> $config_mak -fi +#if [ "$platform" = "gp2x" ]; then +# echo >> $config_mak +# echo '%.o: %.S' >> $config_mak +# echo ' $(CC) $(CFLAGS) -E -c $^ -o /tmp/$(notdir $@).s' >> $config_mak +# echo ' $(AS) $(ASFLAGS) /tmp/$(notdir $@).s -o $@' >> $config_mak +#fi # use pandora's skin (for now) test -e skin || ln -s platform/pandora/skin skin diff --git a/cpu/DrZ80/drz80.S b/cpu/DrZ80/drz80.S new file mode 100644 index 00000000..5a075299 --- /dev/null +++ b/cpu/DrZ80/drz80.S @@ -0,0 +1,8345 @@ +;@ Reesy's Z80 Emulator Version 0.001 + +;@ (c) Copyright 2004 Reesy, All rights reserved +;@ (c) Copyright 2021-24 irixxxx, All rights reserved +;@ DrZ80 is free for non-commercial use. + +;@ For commercial use, separate licencing terms must be obtained. + +;@ version 0.002: +;@ modified for support of undefined F register bits by irixxxx, passes ZEXALL with a hack for +;@ BIT n,(HL), which is the only instruction needing support for the internal WZ register. + +#include + + .data + .align 4 + + .global DrZ80Run + .global DrZ80Ver + .global DrZ80_ARM ;@ need these externally to convert flags in Z80F which + .global DrARM_Z80 ;@ is in internal representation to its Z80 form + + .equiv INTERRUPT_MODE, 0 ;@0 = Use internal int handler, 1 = Use Mames int handler + .equiv FAST_Z80SP, 0 ;@0 = Use mem functions for stack pointer, 1 = Use direct mem pointer + .equiv UPDATE_CONTEXT, 0 + .equiv DRZ80_XMAP, 1 + .equiv DRZ80_XMAP_MORE_INLINE, 1 + +.if DRZ80_XMAP + .equ Z80_MEM_SHIFT, 10 +.endif + +.if INTERRUPT_MODE + .extern Interrupt +.endif + +DrZ80Ver: .long 0x0002 + +;@ --------------------------- Defines ---------------------------- +;@ Make sure that regs/pointers for z80pc to z80sp match up! + + eabi_align .req r2 @ pushed for EABI alignment all over the place + z80_icount .req r3 + opcodes .req r4 + cpucontext .req r5 + z80pc .req r6 + z80a .req r7 + z80f .req r8 + z80bc .req r9 + z80de .req r10 + z80hl .req r11 + z80sp .req r12 + z80xx .req lr + + .equ z80pc_pointer, 0 ;@ 0 + .equ z80a_pointer, z80pc_pointer+4 ;@ 4 + .equ z80f_pointer, z80a_pointer+4 ;@ 8 + .equ z80bc_pointer, z80f_pointer+4 ;@ + .equ z80de_pointer, z80bc_pointer+4 + .equ z80hl_pointer, z80de_pointer+4 + .equ z80sp_pointer, z80hl_pointer+4 + .equ z80pc_base, z80sp_pointer+4 + .equ z80sp_base, z80pc_base+4 + .equ z80ix, z80sp_base+4 + .equ z80iy, z80ix+4 + .equ z80i, z80iy+4 + .equ z80a2, z80i+4 + .equ z80f2, z80a2+4 + .equ z80bc2, z80f2+4 + .equ z80de2, z80bc2+4 + .equ z80hl2, z80de2+4 + .equ cycles_pointer, z80hl2+4 + .equ previouspc, cycles_pointer+4 + .equ z80irq, previouspc+4 + .equ z80if, z80irq+1 + .equ z80im, z80if+1 + .equ z80r, z80im+1 + .equ z80irqvector, z80r+1 + .equ z80irqcallback, z80irqvector+4 + .equ z80_write8, z80irqcallback+4 + .equ z80_write16, z80_write8+4 + .equ z80_in, z80_write16+4 + .equ z80_out, z80_in+4 + .equ z80_read8, z80_out+4 + .equ z80_read16, z80_read8+4 + .equ z80_rebaseSP, z80_read16+4 + .equ z80_rebasePC, z80_rebaseSP+4 + + .equ VFlag, 0 + .equ CFlag, 1 + .equ ZFlag, 2 + .equ SFlag, 3 + .equ HFlag, 4 + .equ Flag3, 5 ;@ NB (Flag5-Flag3) must be (Z80_Flag5-Z80_Flag3) + .equ NFlag, 6 + .equ Flag5, 7 + + .equ Z80_CFlag, 0 + .equ Z80_NFlag, 1 + .equ Z80_VFlag, 2 + .equ Z80_Flag3, 3 + .equ Z80_HFlag, 4 + .equ Z80_Flag5, 5 + .equ Z80_ZFlag, 6 + .equ Z80_SFlag, 7 + + .equ Z80_IF1, 1<<0 + .equ Z80_IF2, 1<<1 + .equ Z80_HALT, 1<<2 + .equ Z80_NMI, 1<<3 + +;@--------------------------------------- + + .equ VBit, 1< r1 +.if FAST_Z80SP +.if DRZ80_XMAP + stack_check +.endif + mov r1,\reg, lsr #8 + strb r1,[z80sp,#-1]! + strb \reg,[z80sp,#-1]! +.else + mov r0,\reg + subs z80sp,z80sp,#2 + addcc z80sp,z80sp,#1<<16 + mov r1,z80sp + writemem16 +.endif +.endm + +.macro opPUSHreg reg +.if FAST_Z80SP +.if DRZ80_XMAP + stack_check +.endif + mov r1,\reg, lsr #24 + strb r1,[z80sp,#-1]! + mov r1,\reg, lsr #16 + strb r1,[z80sp,#-1]! +.else + mov r0,\reg,lsr #16 + subs z80sp,z80sp,#2 + addcc z80sp,z80sp,#1<<16 + mov r1,z80sp + writemem16 +.endif +.endm +;@--------------------------------------- + +.macro opRESmemHL bit + mov r0,z80hl, lsr #16 +.if DRZ80_XMAP + bl z80_xmap_read8 + bic r0,r0,#1<<\bit + mov r1,z80hl, lsr #16 + bl z80_xmap_write8 +.else + stmfd sp!,{r3,r12} + mov lr,pc + ldr pc,[cpucontext,#z80_read8] ;@ r0 = addr - data returned in r0 + bic r0,r0,#1<<\bit + mov r1,z80hl, lsr #16 + mov lr,pc + ldr pc,[cpucontext,#z80_write8] ;@ r0=data r1=addr + ldmfd sp!,{r3,r12} +.endif + fetch 15 +.endm +;@--------------------------------------- + +.macro opRESmem bit +.if DRZ80_XMAP + stmfd sp!,{r0,r2} ;@ save addr as well + bl z80_xmap_read8 + bic r0,r0,#1<<\bit + ldmfd sp!,{r1,r2} ;@ restore addr into r1 + bl z80_xmap_write8 +.else + stmfd sp!,{r3,r12} + stmfd sp!,{r0,r2} ;@ save addr as well + mov lr,pc + ldr pc,[cpucontext,#z80_read8] ;@ r0=addr - data returned in r0 + bic r0,r0,#1<<\bit + ldmfd sp!,{r1,r2} ;@ restore addr into r1 + mov lr,pc + ldr pc,[cpucontext,#z80_write8] ;@ r0=data r1=addr + ldmfd sp!,{r3,r12} +.endif + fetch 23 +.endm +;@--------------------------------------- + +.macro opRL reg1 reg2 shift + movs \reg1,\reg2,lsl \shift + tst z80f,#1< 0xFF + writemem8HL + add z80hl,z80hl,#1<<16 + sub z80bc,z80bc,#1<<24 + tst z80bc,#0xFF<<24 + orrmi z80f,z80f,#1< 0xFF + sub z80bc,z80bc,#1<<24 + tst z80bc,#0xFF<<24 + orrmi z80f,z80f,#1< 0xFF + writemem8HL + sub z80hl,z80hl,#1<<16 + sub z80bc,z80bc,#1<<24 + tst z80bc,#0xFF<<24 + orrmi z80f,z80f,#1< 0xFF + sub z80bc,z80bc,#1<<24 + tst z80bc,#0xFF<<24 + orrmi z80f,z80f,#1< 0xFF + writemem8HL + add z80hl,z80hl,#1<<16 + sub z80bc,z80bc,#1<<24 + tst z80bc,#0xFF<<24 + orrmi z80f,z80f,#1< 0xFF + sub z80bc,z80bc,#1<<24 + tst z80bc,#0xFF<<24 + orrmi z80f,z80f,#1< 0xFF + writemem8HL + sub z80hl,z80hl,#1<<16 + sub z80bc,z80bc,#1<<24 + tst z80bc,#0xFF<<24 + orrmi z80f,z80f,#1< 0xFF + sub z80bc,z80bc,#1<<24 + tst z80bc,#0xFF<<24 + orrmi z80f,z80f,#1< r1 -.if FAST_Z80SP -.if DRZ80_XMAP - stack_check -.endif - mov r1,\reg, lsr #8 - strb r1,[z80sp,#-1]! - strb \reg,[z80sp,#-1]! -.else - mov r0,\reg - sub z80sp,z80sp,#2 - mov r1,z80sp - writemem16 -.endif -.endm - -.macro opPUSHreg reg -.if FAST_Z80SP -.if DRZ80_XMAP - stack_check -.endif - mov r1,\reg, lsr #24 - strb r1,[z80sp,#-1]! - mov r1,\reg, lsr #16 - strb r1,[z80sp,#-1]! -.else - mov r0,\reg,lsr #16 - sub z80sp,z80sp,#2 - mov r1,z80sp - writemem16 -.endif -.endm -;@--------------------------------------- - -.macro opRESmemHL bit - mov r0,z80hl, lsr #16 -.if DRZ80_XMAP - bl z80_xmap_read8 - bic r0,r0,#1<<\bit - mov r1,z80hl, lsr #16 - bl z80_xmap_write8 -.else - stmfd sp!,{r3,r12} - mov lr,pc - ldr pc,[cpucontext,#z80_read8] ;@ r0 = addr - data returned in r0 - bic r0,r0,#1<<\bit - mov r1,z80hl, lsr #16 - mov lr,pc - ldr pc,[cpucontext,#z80_write8] ;@ r0=data r1=addr - ldmfd sp!,{r3,r12} -.endif - fetch 15 -.endm -;@--------------------------------------- - -.macro opRESmem bit -.if DRZ80_XMAP - stmfd sp!,{r0} ;@ save addr as well - bl z80_xmap_read8 - bic r0,r0,#1<<\bit - ldmfd sp!,{r1} ;@ restore addr into r1 - bl z80_xmap_write8 -.else - stmfd sp!,{r3,r12} - stmfd sp!,{r0} ;@ save addr as well - mov lr,pc - ldr pc,[cpucontext,#z80_read8] ;@ r0=addr - data returned in r0 - bic r0,r0,#1<<\bit - ldmfd sp!,{r1} ;@ restore addr into r1 - mov lr,pc - ldr pc,[cpucontext,#z80_write8] ;@ r0=data r1=addr - ldmfd sp!,{r3,r12} -.endif - fetch 23 -.endm -;@--------------------------------------- - -.macro opRL reg1 reg2 shift - movs \reg1,\reg2,lsl \shift - tst z80f,#1< 0xFF - writemem8HL - add z80hl,z80hl,#1<<16 - sub z80bc,z80bc,#1<<24 - tst z80bc,#0xFF<<24 - orrmi z80f,z80f,#1< 0xFF - sub z80bc,z80bc,#1<<24 - tst z80bc,#0xFF<<24 - orrmi z80f,z80f,#1< 0xFF - writemem8HL - sub z80hl,z80hl,#1<<16 - sub z80bc,z80bc,#1<<24 - tst z80bc,#0xFF<<24 - orrmi z80f,z80f,#1< 0xFF - sub z80bc,z80bc,#1<<24 - tst z80bc,#0xFF<<24 - orrmi z80f,z80f,#1< 0xFF - writemem8HL - add z80hl,z80hl,#1<<16 - sub z80bc,z80bc,#1<<24 - tst z80bc,#0xFF<<24 - orrmi z80f,z80f,#1< 0xFF - sub z80bc,z80bc,#1<<24 - tst z80bc,#0xFF<<24 - orrmi z80f,z80f,#1< 0xFF - writemem8HL - sub z80hl,z80hl,#1<<16 - sub z80bc,z80bc,#1<<24 - tst z80bc,#0xFF<<24 - orrmi z80f,z80f,#1< 0xFF - sub z80bc,z80bc,#1<<24 - tst z80bc,#0xFF<<24 - orrmi z80f,z80f,#1< #include #endif @@ -106,7 +107,7 @@ void Cz80_Init(cz80_struc *CPU) for (i = 0; i < CZ80_FETCH_BANK; i++) { - CPU->Fetch[i] = (FPTR)cz80_bad_address; + CPU->Fetch[i] = (FPTR)cz80_bad_address - (i << CZ80_FETCH_SFT); #if CZ80_ENCRYPTED_ROM CPU->OPFetch[i] = 0; #endif @@ -195,10 +196,9 @@ void Cz80_Init(cz80_struc *CPU) CPU->pzR16[0] = pzBC; CPU->pzR16[1] = pzDE; CPU->pzR16[2] = pzHL; - CPU->pzR16[3] = pzAF; + CPU->pzR16[3] = pzFA; zIX = zIY = 0xffff; - zF = ZF; CPU->Interrupt_Callback = Cz80_Interrupt_Callback; } @@ -210,7 +210,8 @@ void Cz80_Init(cz80_struc *CPU) void Cz80_Reset(cz80_struc *CPU) { - memset(CPU, 0, (FPTR)&CPU->BasePC - (FPTR)CPU); + // I, R, CPU and interrupts logic is reset, registers are untouched + memset(&CPU->R, 0, (FPTR)&CPU->BasePC - (FPTR)&CPU->R); Cz80_Set_Reg(CPU, CZ80_PC, 0); } @@ -243,7 +244,6 @@ INT32 Cz80_Exec(cz80_struc *CPU, INT32 cycles) UINT32 adr = 0; UINT32 res; UINT32 val; - int afterEI = 0; union16 *data; PC = CPU->PC; @@ -253,41 +253,46 @@ INT32 Cz80_Exec(cz80_struc *CPU, INT32 cycles) CPU->ICount = cycles - CPU->ExtraCycles; CPU->ExtraCycles = 0; - if (!CPU->HaltState) - { Cz80_Exec: - if (CPU->ICount > 0) + if (CPU->Status) + { + if (CPU->Status & CZ80_HAS_NMI) { -Cz80_Exec_nocheck: - data = pzHL; - Opcode = READ_OP(); -#if CZ80_EMULATE_R_EXACTLY - zR++; -#endif - #include "cz80_op.c" - } - - if (afterEI) + zIFF1 = 0; + CPU->Status &= ~(CZ80_HALTED | CZ80_HAS_NMI); + CPU->ExtraCycles += 11; + PUSH_16(zRealPC); + SET_PC(0x66); + } else if (CPU->Status & CZ80_HAS_INT) { - afterEI = 0; -Cz80_Check_Interrupt: - if (CPU->IRQState != CLEAR_LINE) - { - CHECK_INT - CPU->ICount -= CPU->ExtraCycles; - CPU->ExtraCycles = 0; - } - goto Cz80_Exec; + CHECK_INT + } else if (CPU->Status & CZ80_HALTED) + { + goto Cz80_Exec_End; } + CPU->ICount -= CPU->ExtraCycles; + CPU->ExtraCycles = 0; + } + + if (CPU->ICount > 0) + { +Cz80_Exec_nocheck: + data = pzHL; + Opcode = READ_OP(); +#if CZ80_EMULATE_R_EXACTLY + zR++; +#endif + #include "cz80_op.c" } - else CPU->ICount = 0; Cz80_Exec_End: CPU->PC = PC; #if CZ80_ENCRYPTED_ROM CPU->OPBase = OPBase; #endif - cycles -= CPU->ICount; + if (!((CPU->Status & CZ80_HALTED) && CPU->ICount > 0)) + cycles -= CPU->ICount; + CPU->ICount = 0; #if !CZ80_EMULATE_R_EXACTLY zR = (zR + (cycles >> 2)) & 0x7f; #endif @@ -304,29 +309,21 @@ void Cz80_Set_IRQ(cz80_struc *CPU, INT32 line, INT32 state) { if (line == IRQ_LINE_NMI) { - zIFF1 = 0; - CPU->ExtraCycles += 11; - CPU->HaltState = 0; - PUSH_16(CPU->PC - CPU->BasePC) - Cz80_Set_Reg(CPU, CZ80_PC, 0x66); - } - else + if (state) + CPU->Status |= CZ80_HAS_NMI; + else + CPU->Status &= ~CZ80_HAS_NMI; + } else { + CPU->IRQLine = line; CPU->IRQState = state; - - if (state != CLEAR_LINE) + if (state) { - FPTR PC = CPU->PC; -#if CZ80_ENCRYPTED_ROM - FPTR OPBase = CPU->OPBase; -#endif - - CPU->IRQLine = line; - CHECK_INT - CPU->PC = PC; -#if CZ80_ENCRYPTED_ROM - CPU->OPBase = OPBase; -#endif + if (zIFF1) + CPU->Status |= CZ80_HAS_INT; + } else + { + CPU->Status &= ~CZ80_HAS_INT; } } } @@ -342,13 +339,13 @@ UINT32 Cz80_Get_Reg(cz80_struc *CPU, INT32 regnum) { case CZ80_PC: return (CPU->PC - CPU->BasePC); case CZ80_SP: return zSP; - case CZ80_AF: return zAF; + case CZ80_FA: return zFA; case CZ80_BC: return zBC; case CZ80_DE: return zDE; case CZ80_HL: return zHL; case CZ80_IX: return zIX; case CZ80_IY: return zIY; - case CZ80_AF2: return zAF2; + case CZ80_FA2: return zFA2; case CZ80_BC2: return zBC2; case CZ80_DE2: return zDE2; case CZ80_HL2: return zHL2; @@ -357,7 +354,7 @@ UINT32 Cz80_Get_Reg(cz80_struc *CPU, INT32 regnum) case CZ80_IM: return zIM; case CZ80_IFF1: return zIFF1; case CZ80_IFF2: return zIFF2; - case CZ80_HALT: return CPU->HaltState; + case CZ80_HALT: return CPU->Status & CZ80_HALTED; case CZ80_IRQ: return CPU->IRQState; default: return 0; } @@ -381,13 +378,13 @@ void Cz80_Set_Reg(cz80_struc *CPU, INT32 regnum, UINT32 val) break; case CZ80_SP: zSP = val; break; - case CZ80_AF: zAF = val; break; + case CZ80_FA: zFA = val; break; case CZ80_BC: zBC = val; break; case CZ80_DE: zDE = val; break; case CZ80_HL: zHL = val; break; case CZ80_IX: zIX = val; break; case CZ80_IY: zIY = val; break; - case CZ80_AF2: zAF2 = val; break; + case CZ80_FA2: zFA2 = val; break; case CZ80_BC2: zBC2 = val; break; case CZ80_DE2: zDE2 = val; break; case CZ80_HL2: zHL2 = val; break; @@ -396,7 +393,7 @@ void Cz80_Set_Reg(cz80_struc *CPU, INT32 regnum, UINT32 val) case CZ80_IM: zIM = val; break; case CZ80_IFF1: zIFF1 = val ? (1 << 2) : 0; break; case CZ80_IFF2: zIFF2 = val ? (1 << 2) : 0; break; - case CZ80_HALT: CPU->HaltState = val; break; + case CZ80_HALT: CPU->Status = !!val * CZ80_HALTED; break; case CZ80_IRQ: CPU->IRQState = val; break; default: break; } diff --git a/cpu/cz80/cz80.h b/cpu/cz80/cz80.h index 9b97d3b4..894ef5c3 100644 --- a/cpu/cz80/cz80.h +++ b/cpu/cz80/cz80.h @@ -11,71 +11,80 @@ #ifndef CZ80_H #define CZ80_H +// uintptr_t +#include +#ifndef _MSC_VER +#include +#endif + #ifdef __cplusplus extern "C" { #endif +#include + /******************************/ /* Compiler dependant defines */ /******************************/ #ifndef UINT8 -#define UINT8 unsigned char +#define UINT8 u8 #endif #ifndef INT8 -#define INT8 signed char +#define INT8 s8 #endif #ifndef UINT16 -#define UINT16 unsigned short +#define UINT16 u16 #endif #ifndef INT16 -#define INT16 signed short +#define INT16 s16 #endif #ifndef UINT32 -#define UINT32 unsigned int +#define UINT32 u32 #endif #ifndef INT32 -#define INT32 signed int +#define INT32 s32 #endif #ifndef FPTR -#define FPTR unsigned long +#define FPTR uptr #endif /*************************************/ /* Z80 core Structures & definitions */ /*************************************/ -#define CZ80_FETCH_BITS 4 // [4-12] default = 8 +// NB this must have at least the value of (16-Z80_MEM_SHIFT) +#define CZ80_FETCH_BITS 6 // [4-12] default = 8 #define CZ80_FETCH_SFT (16 - CZ80_FETCH_BITS) #define CZ80_FETCH_BANK (1 << CZ80_FETCH_BITS) -#define PICODRIVE_HACKS 1 -#define CZ80_LITTLE_ENDIAN 1 +#define PICODRIVE_HACKS 1 +#define CZ80_LITTLE_ENDIAN CPU_IS_LE #define CZ80_USE_JUMPTABLE 1 -#define CZ80_BIG_FLAGS_ARRAY 1 +#define CZ80_BIG_FLAGS_ARRAY 1 //#ifdef BUILD_CPS1PSP //#define CZ80_ENCRYPTED_ROM 1 //#else #define CZ80_ENCRYPTED_ROM 0 //#endif -#define CZ80_EMULATE_R_EXACTLY 1 +#define CZ80_EMULATE_R_EXACTLY 1 #define zR8(A) (*CPU->pzR8[A]) #define zR16(A) (CPU->pzR16[A]->W) -#define pzAF &(CPU->AF) -#define zAF CPU->AF.W -#define zlAF CPU->AF.B.L -#define zhAF CPU->AF.B.H -#define zA zhAF -#define zF zlAF +#define pzFA &(CPU->FA) +#define zFA CPU->FA.W +#define zlFA CPU->FA.B.L +#define zhFA CPU->FA.B.H +#define zA zlFA +#define zF zhFA #define pzBC &(CPU->BC) #define zBC CPU->BC.W @@ -98,11 +107,11 @@ extern "C" { #define zH zhHL #define zL zlHL -#define zAF2 CPU->AF2.W -#define zlAF2 CPU->AF2.B.L -#define zhAF2 CPU->AF2.B.H -#define zA2 zhAF2 -#define zF2 zlAF2 +#define zFA2 CPU->FA2.W +#define zlFA2 CPU->FA2.B.L +#define zhFA2 CPU->FA2.B.H +#define zA2 zhFA2 +#define zF2 zlFA2 #define zBC2 CPU->BC2.W #define zDE2 CPU->DE2.W @@ -161,6 +170,10 @@ extern "C" { #define CZ80_IFF_SFT CZ80_PF_SFT #define CZ80_IFF CZ80_PF +#define CZ80_HAS_INT 0x1 +#define CZ80_HAS_NMI 0x2 +#define CZ80_HALTED 0x4 + #ifndef IRQ_LINE_STATE #define IRQ_LINE_STATE #define CLEAR_LINE 0 /* clear (a fired, held or pulsed) line */ @@ -174,13 +187,13 @@ enum { CZ80_PC = 1, CZ80_SP, - CZ80_AF, + CZ80_FA, CZ80_BC, CZ80_DE, CZ80_HL, CZ80_IX, CZ80_IY, - CZ80_AF2, + CZ80_FA2, CZ80_BC2, CZ80_DE2, CZ80_HL2, @@ -219,7 +232,7 @@ typedef struct cz80_t union16 BC; union16 DE; union16 HL; - union16 AF; + union16 FA; }; }; @@ -231,14 +244,14 @@ typedef struct cz80_t union16 BC2; union16 DE2; union16 HL2; - union16 AF2; + union16 FA2; union16 R; union16 IFF; UINT8 I; UINT8 IM; - UINT8 HaltState; + UINT8 Status; UINT8 dummy; INT32 IRQLine; diff --git a/cpu/cz80/cz80_op.c b/cpu/cz80/cz80_op.c index f84f8e75..566782cf 100644 --- a/cpu/cz80/cz80_op.c +++ b/cpu/cz80/cz80_op.c @@ -200,9 +200,15 @@ OP_LD_mNN_xx: POP -----------------------------------------*/ + OP(0xf1): // POP AF +OP_POP_AF: + POP_16(res) + zA = res >> 8; + zF = res & 0xFF; + RET(10) + OP(0xc1): // POP BC OP(0xd1): // POP DE - OP(0xf1): // POP AF OP_POP_RR: data = CPU->pzR16[(Opcode >> 4) & 3]; @@ -215,9 +221,14 @@ OP_POP: PUSH -----------------------------------------*/ + OP(0xf5): // PUSH AF +OP_PUSH_AF: + PUSH_16((zA << 8) | zF); + RET(11) + + OP(0xc5): // PUSH BC OP(0xd5): // PUSH DE - OP(0xf5): // PUSH AF OP_PUSH_RR: data = CPU->pzR16[(Opcode >> 4) & 3]; @@ -232,9 +243,9 @@ OP_PUSH: OP(0x08): // EX AF,AF' OP_EX_AF_AF2: - res = zAF; - zAF = zAF2; - zAF2 = res; + res = zFA; + zFA = zFA2; + zFA2 = res; RET(4) OP(0xeb): // EX DE,HL @@ -686,37 +697,28 @@ OP_CCF: OP(0x76): // HALT OP_HALT: - CPU->HaltState = 1; - CPU->ICount = 0; - goto Cz80_Check_Interrupt; + CPU->Status |= CZ80_HALTED; + RET(4) OP(0xf3): // DI OP_DI: zIFF = 0; - RET(4) + USE_CYCLES(4) + goto Cz80_Exec_nocheck; OP(0xfb): // EI OP_EI: USE_CYCLES(4) if (!zIFF1) { - zIFF1 = zIFF2 = (1 << 2); - while (GET_OP() == 0xfb) - { - USE_CYCLES(4) - PC++; -#if CZ80_EMULATE_R_EXACTLY - zR++; -#endif - } if (CPU->IRQState) { - afterEI = 1; - CPU->ExtraCycles += 1 - CPU->ICount; - CPU->ICount = 1; + CPU->Status |= CZ80_HAS_INT; + CPU->ExtraCycles -= CPU->ICount; + CPU->ICount = 0; } } - else zIFF2 = (1 << 2); + zIFF1 = zIFF2 = (1 << 2); goto Cz80_Exec_nocheck; /*----------------------------------------- diff --git a/cpu/cz80/cz80_opCB.c b/cpu/cz80/cz80_opCB.c index 58a78c53..deb493fe 100644 --- a/cpu/cz80/cz80_opCB.c +++ b/cpu/cz80/cz80_opCB.c @@ -283,6 +283,7 @@ switch (Opcode) OPCB(0x7d): // BIT 7,L OPCB(0x7f): // BIT 7,A zF = (zF & CF) | HF | SZ_BIT[zR8(Opcode & 7) & (1 << ((Opcode >> 3) & 7))]; + zF = (zF & ~(XF | YF)) | (zR8(Opcode & 7) & (XF | YF)); RET(8) OPCB(0x46): // BIT 0,(HL) @@ -295,6 +296,7 @@ switch (Opcode) OPCB(0x7e): // BIT 7,(HL) src = READ_MEM8(zHL); zF = (zF & CF) | HF | SZ_BIT[src & (1 << ((Opcode >> 3) & 7))]; + zF = (zF & ~(XF | YF)) | (0xc0 & (XF | YF)); // TODO ZEXALL hack, need WZ... RET(12) /*----------------------------------------- diff --git a/cpu/cz80/cz80_opED.c b/cpu/cz80/cz80_opED.c index c3b7ec64..844d91ee 100644 --- a/cpu/cz80/cz80_opED.c +++ b/cpu/cz80/cz80_opED.c @@ -407,30 +407,14 @@ OP_SBC16: RET(8) /*----------------------------------------- - RETN + RETI/RETN -----------------------------------------*/ - OPED(0x45): // RETN; - OPED(0x55): // RETN; - OPED(0x65): // RETN; - OPED(0x75): // RETN; - POP_16(res); - SET_PC(res); - if (!zIFF1 && zIFF2) - { - zIFF1 = (1 << 2); - if (CPU->IRQState) - { - USE_CYCLES(10) - goto Cz80_Check_Interrupt; - } - } - else zIFF1 = zIFF2; - RET(10) - -/*----------------------------------------- - RETI ------------------------------------------*/ + // works the same, but Z80 PIO can detect the opcode + OPED(0x45): // RETN + OPED(0x55): // RETN + OPED(0x65): // RETN + OPED(0x75): // RETN OPED(0x4d): // RETI OPED(0x5d): // RETI @@ -438,6 +422,14 @@ OP_SBC16: OPED(0x7d): // RETI POP_16(res); SET_PC(res); + if (!zIFF1 && zIFF2) + { + if (CPU->IRQState) + { + CPU->Status |= CZ80_HAS_INT; + } + } + zIFF1 = zIFF2; RET(10) /*----------------------------------------- @@ -497,8 +489,8 @@ OP_LDX: val = READ_MEM8(zHL++); WRITE_MEM8(zDE++, val); zBC--; - USE_CYCLES(17) - } while (zBC && (CPU->ICount > 0)); + USE_CYCLES(21) + } while (zBC && (CPU->ICount > -4) && !CPU->Status); goto OP_LDXR; OPED(0xb8): // LDDR @@ -507,8 +499,8 @@ OP_LDX: val = READ_MEM8(zHL--); WRITE_MEM8(zDE--, val); zBC--; - USE_CYCLES(17) - } while (zBC && (CPU->ICount > 0)); + USE_CYCLES(21) + } while (zBC && (CPU->ICount > -4) && !CPU->Status); OP_LDXR: F = zF & (SF | ZF | CF); @@ -521,10 +513,11 @@ OP_LDXR: #if CZ80_EMULATE_R_EXACTLY zR--; #endif - goto Cz80_Exec_End; + ADD_CYCLES(4) + goto Cz80_Exec; } zF = F; - ADD_CYCLES(5) + ADD_CYCLES(4+5) goto Cz80_Exec; /*----------------------------------------- @@ -564,8 +557,8 @@ OP_CPX: if (res & 0x08) F |= XF; if (zBC) F |= VF; zF = F; - USE_CYCLES(17) - } while (zBC && !(F & ZF) && (CPU->ICount > 0)); + USE_CYCLES(21) + } while (zBC && !(F & ZF) && (CPU->ICount > -4) && !CPU->Status); goto OP_CPXR; OPED(0xb9): // CPDR @@ -580,8 +573,8 @@ OP_CPX: if (res & 0x08) F |= XF; if (zBC) F |= VF; zF = F; - USE_CYCLES(17) - } while (zBC && !(F & ZF) && (CPU->ICount > 0)); + USE_CYCLES(21) + } while (zBC && !(F & ZF) && (CPU->ICount > -4) && !CPU->Status); OP_CPXR: if (zBC && !(F & ZF)) @@ -590,9 +583,10 @@ OP_CPXR: #if CZ80_EMULATE_R_EXACTLY zR--; #endif - goto Cz80_Exec_End; + ADD_CYCLES(4) + goto Cz80_Exec; } - ADD_CYCLES(5) + ADD_CYCLES(4+5) goto Cz80_Exec; /*----------------------------------------- @@ -614,7 +608,7 @@ OP_INX: F = SZ[zB]; res = ((UINT32)(zC - 1) & 0xff) + (UINT32)val; if (val & SF) F |= NF; - if (res & 0x100) F |= HF | CF; + if (res < val) F |= HF | CF; F |= SZP[(UINT8)(res & 0x07) ^ zB] & PF; zF = F; RET(12) @@ -629,8 +623,8 @@ OP_INX: val = IN(zBC); zB--; WRITE_MEM8(zHL++, val); - USE_CYCLES(17) - } while (zB && (CPU->ICount > 0)); + USE_CYCLES(21) + } while (zB && (CPU->ICount > -4) && !CPU->Status); goto OP_INXR; OPED(0xba): // INDR @@ -639,14 +633,14 @@ OP_INX: val = IN(zBC); zB--; WRITE_MEM8(zHL--, val); - USE_CYCLES(17) - } while (zB && (CPU->ICount > 0)); + USE_CYCLES(21) + } while (zB && (CPU->ICount > -4) && !CPU->Status); OP_INXR: F = SZ[zB]; res = ((UINT32)(zC - 1) & 0xff) + (UINT32)val; if (val & SF) F |= NF; - if (res & 0x100) F |= HF | CF; + if (res < val) F |= HF | CF; F |= SZP[(UINT8)(res & 0x07) ^ zB] & PF; zF = F; if (zB) @@ -655,9 +649,10 @@ OP_INXR: #if CZ80_EMULATE_R_EXACTLY zR--; #endif - goto Cz80_Exec_End; + ADD_CYCLES(4) + goto Cz80_Exec; } - ADD_CYCLES(5); + ADD_CYCLES(4+5); goto Cz80_Exec; /*----------------------------------------- @@ -679,8 +674,8 @@ OP_OUTX: F = SZ[zB]; res = (UINT32)zL + (UINT32)val; if (val & SF) F |= NF; - if (res & 0x100) F |= HF | CF; - F |= SZP[(UINT8)(res & 0x07) - zB] & PF; + if (res < val) F |= HF | CF; + F |= SZP[(UINT8)(res & 0x07) ^ zB] & PF; zF = F; RET(12) @@ -694,8 +689,8 @@ OP_OUTX: val = READ_MEM8(zHL++); zB--; OUT(zBC, val); - USE_CYCLES(17) - } while (zB && (CPU->ICount > 0)); + USE_CYCLES(21) + } while (zB && (CPU->ICount > -4) && !CPU->Status); goto OP_OTXR; OPED(0xbb): // OTDR @@ -704,15 +699,15 @@ OP_OUTX: val = READ_MEM8(zHL--); zB--; OUT(zBC, val); - USE_CYCLES(17) - } while (zB && (CPU->ICount > 0)); + USE_CYCLES(21) + } while (zB && (CPU->ICount > -4) && !CPU->Status); OP_OTXR: F = SZ[zB]; res = (UINT32)zL + (UINT32)val; if (val & SF) F |= NF; - if (res & 0x100) F |= HF | CF; - F |= SZP[(UINT8)(res & 0x07) - zB] & PF; + if (res < val) F |= HF | CF; + F |= SZP[(UINT8)(res & 0x07) ^ zB] & PF; zF = F; if (zB) { @@ -720,9 +715,10 @@ OP_OTXR: #if CZ80_EMULATE_R_EXACTLY zR--; #endif - goto Cz80_Exec_End; + ADD_CYCLES(4) + goto Cz80_Exec; } - ADD_CYCLES(5) + ADD_CYCLES(4+5) goto Cz80_Exec; } diff --git a/cpu/cz80/cz80_opXY.c b/cpu/cz80/cz80_opXY.c index b02e4011..fb53c072 100644 --- a/cpu/cz80/cz80_opXY.c +++ b/cpu/cz80/cz80_opXY.c @@ -70,7 +70,7 @@ switch (Opcode) OPXY(0x5c): // LD E,HX OPXY(0x7c): // LD A,HX zR8((Opcode >> 3) & 7) = data->B.H; - RET(5) + RET(4) OPXY(0x45): // LD B,LX OPXY(0x4d): // LD C,LX @@ -78,7 +78,7 @@ switch (Opcode) OPXY(0x5d): // LD E,LX OPXY(0x7d): // LD A,LX zR8((Opcode >> 3) & 7) = data->B.L; - RET(5) + RET(4) OPXY(0x60): // LD HX,B OPXY(0x61): // LD HX,C @@ -86,7 +86,7 @@ switch (Opcode) OPXY(0x63): // LD HX,E OPXY(0x67): // LD HX,A data->B.H = zR8(Opcode & 7); - RET(5) + RET(4) OPXY(0x68): // LD LX,B OPXY(0x69): // LD LX,C @@ -94,15 +94,15 @@ switch (Opcode) OPXY(0x6b): // LD LX,E OPXY(0x6f): // LD LX,A data->B.L = zR8(Opcode & 7); - RET(5) + RET(4) OPXY(0x65): // LD HX,LX data->B.H = data->B.L; - RET(5) + RET(4) OPXY(0x6c): // LD LX,HX data->B.L = data->B.H; - RET(5) + RET(4) OPXY(0x06): // LD B,#imm OPXY(0x0e): // LD C,#imm @@ -113,11 +113,11 @@ switch (Opcode) OPXY(0x26): // LD HX,#imm data->B.H = READ_ARG(); - RET(5) + RET(4) OPXY(0x2e): // LD LX,#imm data->B.L = READ_ARG(); - RET(5) + RET(4) OPXY(0x0a): // LD A,(BC) goto OP_LOAD_A_mBC; @@ -194,8 +194,9 @@ switch (Opcode) OPXY(0xc1): // POP BC OPXY(0xd1): // POP DE - OPXY(0xf1): // POP AF goto OP_POP_RR; + OPXY(0xf1): // POP AF + goto OP_POP_AF; OPXY(0xe1): // POP IX goto OP_POP; @@ -206,8 +207,9 @@ switch (Opcode) OPXY(0xc5): // PUSH BC OPXY(0xd5): // PUSH DE - OPXY(0xf5): // PUSH AF goto OP_PUSH_RR; + OPXY(0xf5): // PUSH AF + goto OP_PUSH_AF; OPXY(0xe5): // PUSH IX goto OP_PUSH; @@ -242,12 +244,12 @@ switch (Opcode) OPXY(0x24): // INC HX data->B.H++; zF = (zF & CF) | SZHV_inc[data->B.H]; - RET(5) + RET(4) OPXY(0x2c): // INC LX data->B.L++; zF = (zF & CF) | SZHV_inc[data->B.L]; - RET(5) + RET(4) OPXY(0x34): // INC (IX+o) adr = data->W + (INT8)READ_ARG(); @@ -268,12 +270,12 @@ switch (Opcode) OPXY(0x25): // DEC HX data->B.H--; zF = (zF & CF) | SZHV_dec[data->B.H]; - RET(5) + RET(4) OPXY(0x2d): // DEC LX data->B.L--; zF = (zF & CF) | SZHV_dec[data->B.L]; - RET(5) + RET(4) OPXY(0x35): // DEC (IX+o) adr = data->W + (INT8)READ_ARG(); @@ -296,12 +298,10 @@ switch (Opcode) OPXY(0x84): // ADD A,HX val = data->B.H; - USE_CYCLES(1) goto OP_ADD; OPXY(0x85): // ADD A,LX val = data->B.L; - USE_CYCLES(1) goto OP_ADD; OPXY(0x86): // ADD A,(IX+o) @@ -326,12 +326,10 @@ switch (Opcode) OPXY(0x8c): // ADC A,HX val = data->B.H; - USE_CYCLES(1) goto OP_ADC; OPXY(0x8d): // ADC A,LX val = data->B.L; - USE_CYCLES(1) goto OP_ADC; OPXY(0x8e): // ADC A,(IX+o) @@ -356,12 +354,10 @@ switch (Opcode) OPXY(0x94): // SUB HX val = data->B.H; - USE_CYCLES(1) goto OP_SUB; OPXY(0x95): // SUB LX val = data->B.L; - USE_CYCLES(1) goto OP_SUB; OPXY(0x96): // SUB (IX+o) @@ -386,12 +382,10 @@ switch (Opcode) OPXY(0x9c): // SBC A,HX val = data->B.H; - USE_CYCLES(1) goto OP_SBC; OPXY(0x9d): // SBC A,LX val = data->B.L; - USE_CYCLES(1) goto OP_SBC; OPXY(0x9e): // SBC A,(IX+o) @@ -416,12 +410,10 @@ switch (Opcode) OPXY(0xbc): // CP HX val = data->B.H; - USE_CYCLES(1) goto OP_CP; OPXY(0xbd): // CP LX val = data->B.L; - USE_CYCLES(1) goto OP_CP; OPXY(0xbe): // CP (IX+o) @@ -446,12 +438,10 @@ switch (Opcode) OPXY(0xa4): // AND HX val = data->B.H; - USE_CYCLES(1) goto OP_AND; OPXY(0xa5): // AND LX val = data->B.L; - USE_CYCLES(1) goto OP_AND; OPXY(0xa6): // AND (IX+o) @@ -476,12 +466,10 @@ switch (Opcode) OPXY(0xac): // XOR HX val = data->B.H; - USE_CYCLES(1) goto OP_XOR; OPXY(0xad): // XOR LX val = data->B.L; - USE_CYCLES(1) goto OP_XOR; OPXY(0xae): // XOR (IX+o) @@ -506,12 +494,10 @@ switch (Opcode) OPXY(0xb4): // OR HX val = data->B.H; - USE_CYCLES(1) goto OP_OR; OPXY(0xb5): // OR LX val = data->B.L; - USE_CYCLES(1) goto OP_OR; OPXY(0xb6): // OR (IX+o) diff --git a/cpu/cz80/cz80macro.h b/cpu/cz80/cz80macro.h index 5adca13a..edd6d9dd 100644 --- a/cpu/cz80/cz80macro.h +++ b/cpu/cz80/cz80macro.h @@ -48,11 +48,7 @@ #define READ_OP() GET_OP(); PC++ #define READ_ARG() (*(UINT8 *)PC++) -#if CZ80_LITTLE_ENDIAN #define READ_ARG16() (*(UINT8 *)PC | (*(UINT8 *)(PC + 1) << 8)); PC += 2 -#else -#define READ_ARG16() (*(UINT8 *)(PC + 1) | (*(UINT8 *)PC << 8)); PC += 2 -#endif //#ifndef BUILD_CPS1PSP //#define READ_MEM8(A) memory_region_cpu2[(A)] @@ -63,17 +59,13 @@ #define READ_MEM8(A) CPU->Read_Byte(A) #endif //#endif -#if CZ80_LITTLE_ENDIAN #define READ_MEM16(A) (READ_MEM8(A) | (READ_MEM8((A) + 1) << 8)) -#else -#define READ_MEM16(A) ((READ_MEM8(A) << 8) | READ_MEM8((A) + 1)) -#endif #if PICODRIVE_HACKS #define WRITE_MEM8(A, D) { \ unsigned short a = A; \ unsigned char d = D; \ - unsigned long v = z80_write_map[a >> Z80_MEM_SHIFT]; \ + uptr v = z80_write_map[a >> Z80_MEM_SHIFT]; \ if (map_flag_set(v)) \ ((z80_write_f *)(v << 1))(a, d); \ else \ @@ -82,11 +74,7 @@ #else #define WRITE_MEM8(A, D) CPU->Write_Byte(A, D); #endif -#if CZ80_LITTLE_ENDIAN #define WRITE_MEM16(A, D) { WRITE_MEM8(A, D); WRITE_MEM8((A) + 1, (D) >> 8); } -#else -#define WRITE_MEM16(A, D) { WRITE_MEM8((A) + 1, D); WRITE_MEM8(A, (D) >> 8); } -#endif #define PUSH_16(A) { UINT32 sp; zSP -= 2; sp = zSP; WRITE_MEM16(sp, A); } #define POP_16(A) { UINT32 sp; sp = zSP; A = READ_MEM16(sp); zSP = sp + 2; } @@ -94,36 +82,36 @@ #define IN(A) CPU->IN_Port(A) #define OUT(A, D) CPU->OUT_Port(A, D) -#define CHECK_INT \ - if (zIFF1) \ - { \ - UINT32 IntVect; \ - \ - if (CPU->IRQState == HOLD_LINE) \ - CPU->IRQState = CLEAR_LINE; \ - \ - CPU->HaltState = 0; \ - zIFF1 = zIFF2 = 0; \ - IntVect = CPU->Interrupt_Callback(CPU->IRQLine); \ - \ - PUSH_16(zRealPC) \ - \ - if (zIM == 2) \ - { \ - IntVect = (IntVect & 0xff) | (zI << 8); \ - PC = READ_MEM16(IntVect); \ - CPU->ExtraCycles += 17; \ - } \ - else if (zIM == 1) \ - { \ - PC = 0x38; \ - CPU->ExtraCycles += 13; \ - } \ - else \ - { \ - PC = IntVect & 0x38; \ - CPU->ExtraCycles += 13; \ - } \ - \ - SET_PC(PC) \ +#define CHECK_INT \ + if (zIFF1) \ + { \ + UINT32 IntVect; \ + \ + if (CPU->IRQState == HOLD_LINE) \ + CPU->IRQState = CLEAR_LINE; \ + \ + CPU->Status &= ~(CZ80_HALTED|CZ80_HAS_INT); \ + zIFF1 = zIFF2 = 0; \ + IntVect = CPU->Interrupt_Callback(CPU->IRQLine); \ + \ + PUSH_16(zRealPC) \ + \ + if (zIM == 2) \ + { \ + IntVect = (IntVect & 0xff) | (zI << 8); \ + PC = READ_MEM16(IntVect); \ + CPU->ExtraCycles += 17; \ + } \ + else if (zIM == 1) \ + { \ + PC = 0x38; \ + CPU->ExtraCycles += 13; \ + } \ + else \ + { \ + PC = IntVect & 0x38; \ + CPU->ExtraCycles += 13; \ + } \ + \ + SET_PC(PC) \ } diff --git a/cpu/cz80/readme.txt b/cpu/cz80/readme.txt new file mode 100644 index 00000000..2a5d6b06 --- /dev/null +++ b/cpu/cz80/readme.txt @@ -0,0 +1,246 @@ +************************************************ +* * +* CZ80 (Z80 CPU emulator) version 0.91 * +* Compiled with Dev-C++ * +* Copyright 2004-2005 Stphane Dallongeville * +* * +************************************************ + +CZ80 is a Z80 CPU emulator, priorities were given to : +- code size +- speed +- accuracy +- portablity + +It supports almost all undocumented opcodes and flags. + +The emulator can be freely distribued and used for any non commercial +project as long you don't forget to credit me somewhere :) +If you want some support about the CZ80, you can contact me on +the Gens forum (http://gens.consolemul.com then go to the forum). + + +You should find the following files in the emulation pack : +- cz80.h -> header file (prototypes, declarations...) +- cz80.c -> contains emulation core itself +- cz80.inc -> contains most used macros +- cz80jmp.inc -> Jump table definition when Jump Table used +- cz80exec.inc -> contains the major Cz80_Exec(...) function +- cz80_op.inc -> contains code for simple Z80 opcodes emulation +- cz80_opCB.inc -> contains code for CB prefixed Z80 opcodes emulation +- cz80_opED.inc -> contains code for ED prefixed Z80 opcodes emulation +- cz80_opXY.inc -> contains code for DD/FD prefixed Z80 opcodes emulation +- cz80_opXYCB.inc -> contains code for DD/FD + CB prefixed Z80 opcodes emulation +- readme.txt -> the current file you're reading ;) + + +* How compile the emulator ? +**************************** + +The emulator has been written with Dev-CPP 4.9.X.X +You will maybe need to modify the u8, u16, u32, s8, s16, s32 and FASTCALL +definitions (cz80.h) according to your C compiler and the target system. +Then compile the cz80.c file, you should obtain a cz80.o (or cz80.obj) file... +at this moment, you're ready to use the emulator just by linking the file in your project :) + + +* How to use the emulator ? +*************************** + +1) Include the header file in your source : +------------------------------------------ + +#include "cz80.h" + + +2) Init the CZ80 core : +----------------------- + +If you want to use the internal CZ80 context offered : + + Cz80_Init(&CZ80); + +but you can also define your own CZ80 context : + + cz80_struc My_Z80; + + .... + + Cz80_Init(&My_Z80); + + +You'll can emulate as many Z80 CPU as you want by defining severals CZ80 contexts. + + +3) Set up your fetch region (where the Z80 will run code from) : +---------------------------------------------------------------- + + Cz80_Set_Fetch(&CZ80, 0x0000, 0x7FFF, (u32) your_ROM); + Cz80_Set_Fetch(&CZ80, 0xA000, 0xFFFF, (u32) your_RAM); + ... + + +4) Set up your memory (where the Z80 will read and write data) : +---------------------------------------------------------------- + + Cz80_Set_ReadB(&CZ80, your_z80readbyte_function); + Cz80_Set_WriteB(&CZ80, your_z80readbyte_function); + +You can improve CZ80 performance by using WORD read/write function. +For that, you need to enable the 'CZ80_USE_WORD_HANDLER' define in cz80.h file. +In this case, you'll need to add that : + +#if CZ80_USE_WORD_HANDLER + Cz80_Set_ReadW(&CZ80, your_z80readword_function); + Cz80_Set_WriteW(&CZ80, your_z80readword_function); +#endif + +Your read function need to be of CZ80_READ type : +typedef u32 FASTCALL CZ80_READ(u32 adr); + +Your write function need to be of CZ80_WRITE type : +typedef void FASTCALL CZ80_WRITE(u32 adr, u32 data); + + +5) Set Up your port (where the Z80 will read and write IO data) : +----------------------------------------------------------------- + + Cz80_Set_INPort(&CZ80, your_z80readport_function); + Cz80_Set_OUTPort(&CZ80, your_z80writport_function); + +Your readPort function need to be of CZ80_READ type : +typedef u32 FASTCALL CZ80_READ(u32 adr); + +Your writePort function need to be of CZ80_WRITE type : +typedef void FASTCALL CZ80_WRITE(u32 adr, u32 data); + + +6) Set Up your interrupt callback function : +-------------------------------------------- + + Cz80_Set_IRQ_Callback(&CZ80, your_z80irqcallback_function); + +Your IRQ callback function need to be of CZ80_INT_CALLBACK type : +typedef s32 FASTCALL CZ80_INT_CALLBACK(s32 param); + +If you don't understand what i am talking about here, just ignore... +it's not needed in almost case. + + +6) Set Up your RETI callback function : +--------------------------------------- + + Cz80_Set_RETI_Callback(&CZ80, your_z80reticallback_function); + +Your RETI callback function need to be of CZ80_RETI_CALLBACKtype : +typedef void FASTCALL CZ80_RETI_CALLBACK(); + +Again, if you don't understand what i am talking about here, ignore... + + +7) Reset the CZ80 core before fisrt use : +----------------------------------------- + + Cz80_Reset(&CZ80); + + +8) Do some cycles : +------------------- + +Then it's time to really do some work, if you want to execute 1000 cycles, just do : + + cycles_done = Cz80_Exec(&CZ80, 1000); + +Cz80_Exec function return the number of cycles actually done. +Since each instruction take more than 1 cycle, Cz80_Exec will execute a bit more than +you requested, for instance here, it can return 1008 cycles instead of 1000. +In this case, adjust the number of cycle to do like that : + + cycles_by_frame = 4800; + extra_cycles = 0; + while (true) + { + ... + extra_cycles = CZ80_Exec(&CZ80, cycles_by_frame - extra_cycles) - cycles_by_frame; + ... + } + +If Cz80_Exec returns a negatif value, an error occured. + + +9) Do an interrupt request : +---------------------------- + + Cz80_Set_IRQ(&CZ80, 0); + +or for a NMI : + + Cz80_Set_NMI(&CZ80); + + +10) Cancel an interrupt request : +--------------------------------- + + Cz80_Clear_IRQ(&CZ80); + +or for a NMI : + + Cz80_Clear_NMI(&CZ80); + + + +* Switchs +********* + +There are severals switchs in the cz80.h file which permit you to configure +CZ80 depending your needs. + +- CZ80_FETCH_BITS (default = 4) + +This defines the number of bits to select fetch region. +This value must be 4 <= X <= 12 +Greater value offers permit to have more fetch region. +In almost case, 4 is enough, but if you have fetch region smaller than 0x1000 bytes, +increase this value. + +- CZ80_LITTLE_ENDIAN + +Define the endianess of the target platform. +x86 CPU use Little Endian. + +- CZ80_USE_JUMPTABLE + +Set it to 1 to use Jump table instead of big case statement. +This can bring some small speed improvemen. +Be careful, some compiler doesn't support (computed label) so it's +saffer to not use it. + +- CZ80_SIZE_OPT + +Add some extras optimisation for the code size versus speed. +Minor changes anyway... + +- CZ80_USE_WORD_HANDLER + +See the "Set Up Memory" section for more dtail. + +- CZ80_EXACT + +Enable accurate emulation of extended undocumented opcode and flags. +minor speed decrease when activated. +Even without that flag, CZ80 is already uite accurate, keep it +disable unless you need it or if speed isn't important for you. + +- CZ80_DEBUG + +Used by me, keep it disable :p + + + +* History +********* + +Version 0.90 : +-------------- + +* Initial release for debugging purpose ^^ diff --git a/cpu/debug.c b/cpu/debug.c index a464413b..a1d7e117 100644 --- a/cpu/debug.c +++ b/cpu/debug.c @@ -8,7 +8,7 @@ #define _GNU_SOURCE #include -#include "../pico/pico_int.h" +#include #include "debug.h" static char pdb_pending_cmds[128]; diff --git a/cpu/debug.h b/cpu/debug.h index 5aaa60ee..63728ad6 100644 --- a/cpu/debug.h +++ b/cpu/debug.h @@ -26,6 +26,6 @@ int pdb_net_connect(const char *host, const char *port); #else -static inline int pdb_net_connect(const char *host, const char *port) {return 0;} +static __inline int pdb_net_connect(const char *host, const char *port) {return 0;} #endif diff --git a/cpu/drc/cmn.c b/cpu/drc/cmn.c index 37f17ce9..65ffd043 100644 --- a/cpu/drc/cmn.c +++ b/cpu/drc/cmn.c @@ -10,17 +10,29 @@ #include #include "cmn.h" -u8 __attribute__((aligned(4096))) tcache[DRC_TCACHE_SIZE]; - +#if defined(__linux__) && (defined(__aarch64__) || defined(__VFP_FP__)) +// might be running on a 64k-page kernel +#define PICO_PAGE_ALIGN 65536 +#else +#define PICO_PAGE_ALIGN 4096 +#endif +u8 ALIGNED(PICO_PAGE_ALIGN) tcache_default[DRC_TCACHE_SIZE]; +u8 *tcache; void drc_cmn_init(void) { - int ret = plat_mem_set_exec(tcache, sizeof(tcache)); + int ret; + + tcache = plat_mem_get_for_drc(DRC_TCACHE_SIZE); + if (tcache == NULL) + tcache = tcache_default; + + ret = plat_mem_set_exec(tcache, DRC_TCACHE_SIZE); elprintf(EL_STATUS, "drc_cmn_init: %p, %zd bytes: %d", - tcache, sizeof(tcache), ret); + tcache, DRC_TCACHE_SIZE, ret); #ifdef __arm__ - if (PicoOpt & POPT_EN_DRC) + if (PicoIn.opt & POPT_EN_DRC) { static int test_done; if (!test_done) diff --git a/cpu/drc/cmn.h b/cpu/drc/cmn.h index 4737b74d..9c041e70 100644 --- a/cpu/drc/cmn.h +++ b/cpu/drc/cmn.h @@ -1,12 +1,44 @@ -typedef unsigned char u8; -typedef signed char s8; -typedef unsigned short u16; -typedef unsigned int u32; -#define DRC_TCACHE_SIZE (2*1024*1024) +#define DRC_TCACHE_SIZE (4*1024*1024) -extern u8 tcache[DRC_TCACHE_SIZE]; +extern u8 *tcache; void drc_cmn_init(void); void drc_cmn_cleanup(void); +#define BITMASK1(v0) (1 << (v0)) +#define BITMASK2(v0,v1) ((1 << (v0)) | (1 << (v1))) +#define BITMASK3(v0,v1,v2) (BITMASK2(v0,v1) | (1 << (v2))) +#define BITMASK4(v0,v1,v2,v3) (BITMASK3(v0,v1,v2) | (1 << (v3))) +#define BITMASK5(v0,v1,v2,v3,v4) (BITMASK4(v0,v1,v2,v3) | (1 << (v4))) +#define BITMASK6(v0,v1,v2,v3,v4,v5) (BITMASK5(v0,v1,v2,v3,v4) | (1 << (v5))) +#define BITRANGE(v0,v1) (BITMASK1(v1+1)-BITMASK1(v0)) // set with v0..v1 + +// binary search approach, since we don't have CLZ on ARM920T +#define FOR_ALL_BITS_SET_DO(mask, bit, code) { \ + u32 __mask = mask; \ + for (bit = 0; bit < 32 && mask; bit++, __mask >>= 1) { \ + if (!(__mask & 0xffff)) \ + bit += 16,__mask >>= 16; \ + if (!(__mask & 0xff)) \ + bit += 8, __mask >>= 8; \ + if (!(__mask & 0xf)) \ + bit += 4, __mask >>= 4; \ + if (!(__mask & 0x3)) \ + bit += 2, __mask >>= 2; \ + if (!(__mask & 0x1)) \ + bit += 1, __mask >>= 1; \ + if (__mask & 0x1) { \ + code; \ + } \ + } \ +} + +// inspired by https://graphics.stanford.edu/~seander/bithacks.html +static inline int count_bits(unsigned val) +{ + val = val - ((val >> 1) & 0x55555555); + val = (val & 0x33333333) + ((val >> 2) & 0x33333333); + return (((val + (val >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; +} + diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 64face12..3d1184dd 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -1,33 +1,192 @@ /* * Basic macros to emit ARM instructions and some utils * Copyright (C) 2008,2009,2010 notaz + * Copyright (C) 2019-2024 irixxxx * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ -#define CONTEXT_REG 11 +#define HOST_REGS 16 + +// OABI/EABI: params: r0-r3, return: r0-r1, temp: r12,r14, saved: r4-r8,r10,r11 +// SP,PC: r13,r15 must not be used. saved: r9 (for platform use, e.g. on ios) +#define RET_REG 0 +#define PARAM_REGS { 0, 1, 2, 3 } +#ifndef __MACH__ +#define PRESERVED_REGS { 4, 5, 6, 7, 8, 9, 10, 11 } +#else +#define PRESERVED_REGS { 4, 5, 6, 7, 8, 10, 11 } // no r9.. +#endif +#define TEMPORARY_REGS { 12, 14 } + +#define CONTEXT_REG 11 +#define STATIC_SH2_REGS { SHR_SR,10 , SHR_R(0),8 , SHR_R(1),9 } // XXX: tcache_ptr type for SVP and SH2 compilers differs.. #define EMIT_PTR(ptr, x) \ do { \ *(u32 *)ptr = x; \ ptr = (void *)((u8 *)ptr + sizeof(u32)); \ - COUNT_OP; \ } while (0) -#define EMIT(x) EMIT_PTR(tcache_ptr, x) +// ARM special registers and peephole optimization flags +#define SP 13 // stack pointer +#define LR 14 // link (return address) +#define PC 15 // program counter +#define SR 16 // CPSR, status register +#define MEM 17 // memory access (src=LDR, dst=STR) +#define CYC1 20 // 1 cycle interlock (LDR, reg-cntrld shift) +#define CYC2 (CYC1+1)// 2+ cycles interlock (LDR[BH], MUL/MLA etc) +#define NO 32 // token for "no register" -#define A_R4M (1 << 4) -#define A_R5M (1 << 5) -#define A_R6M (1 << 6) -#define A_R7M (1 << 7) -#define A_R8M (1 << 8) -#define A_R9M (1 << 9) -#define A_R10M (1 << 10) -#define A_R11M (1 << 11) -#define A_R12M (1 << 12) -#define A_R14M (1 << 14) -#define A_R15M (1 << 15) +// bitmask builders +#define M1(x) (u32)(1ULL<<(x)) // u32 to have NO evaluate to 0 +#define M2(x,y) (M1(x)|M1(y)) +#define M3(x,y,z) (M2(x,y)|M1(z)) +#define M4(x,y,z,a) (M3(x,y,z)|M1(a)) +#define M5(x,y,z,a,b) (M4(x,y,z,a)|M1(b)) +#define M6(x,y,z,a,b,c) (M5(x,y,z,a,b)|M1(c)) +#define M10(a,b,c,d,e,f,g,h,i,j) (M5(a,b,c,d,e)|M5(f,g,h,i,j)) + +// avoid a warning with clang +static inline uintptr_t pabs(intptr_t v) { return labs(v); } + +// sys_cacheflush always flushes whole pages, and it's rather expensive on ARMs +// hold a list of pending cache updates and merge requests to reduce cacheflush +static struct { void *base, *end; } pageflush[4]; +static unsigned pagesize = 4096; + +static void emith_update_cache(void) +{ + int i; + + for (i = 0; i < 4 && pageflush[i].base; i++) { + cache_flush_d_inval_i(pageflush[i].base, pageflush[i].end + pagesize-1); + pageflush[i].base = NULL; + } +} + +static inline void emith_update_add(void *base, void *end) +{ + void *p_base = (void *)((uintptr_t)(base) & ~(pagesize-1)); + void *p_end = (void *)((uintptr_t)(end ) & ~(pagesize-1)); + int i; + + for (i = 0; i < 4 && pageflush[i].base; i++) { + if (p_base <= pageflush[i].end+pagesize && p_end >= pageflush[i].end) { + if (p_base < pageflush[i].base) pageflush[i].base = p_base; + pageflush[i].end = p_end; + return; + } + if (p_base <= pageflush[i].base && p_end >= pageflush[i].base-pagesize) { + if (p_end > pageflush[i].end) pageflush[i].end = p_end; + pageflush[i].base = p_base; + return; + } + } + if (i == 4) { + /* list full and not mergeable -> flush list */ + emith_update_cache(); + i = 0; + } + pageflush[i].base = p_base, pageflush[i].end = p_end; +} + +// peephole optimizer. ATM only tries to reduce interlock +#define EMIT_CACHE_SIZE 6 +struct emit_op { + u32 op; + u32 src, dst; +}; + +// peephole cache, last commited insn + cache + next insn = size+2 +static struct emit_op emit_cache[EMIT_CACHE_SIZE+2]; +static int emit_index; +#define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr-emit_index) + +static inline void emith_pool_adjust(int tcache_offs, int move_offs); + +static NOINLINE void EMIT(u32 op, u32 dst, u32 src) +{ + void * emit_ptr = (u32 *)tcache_ptr - emit_index; + struct emit_op *const ptr = emit_cache; + const int n = emit_index+1; + int i, bi, bd = 0; + + // account for new insn in tcache + tcache_ptr = (void *)((u32 *)tcache_ptr + 1); + COUNT_OP; + // for conditional execution SR is always source + if (op < 0xe0000000 /*A_COND_AL << 28*/) + src |= M1(SR); + // put insn on back of queue // mask away the NO token + emit_cache[n] = (struct emit_op) + { .op=op, .src=src & ~M1(NO), .dst=dst & ~M1(NO) }; + // check insns down the queue as long as permitted by dependencies + for (bd = bi = 0, i = emit_index; i > 1 && !(dst & M1(PC)); i--) { + int deps = 0; + // dst deps between i and n must not be swapped, since any deps + // but [i].src & [n].src lead to changed semantics if swapped. + if ((ptr[i].dst & ptr[n].src) || (ptr[n].dst & ptr[i].src) || + (ptr[i].dst & ptr[n].dst)) + break; + // don't swap insns reading PC if it's not a word pool load + // (ptr[i].op&0xf700000) != EOP_C_AM2_IMM(0,0,0,1,0,0,0)) + if ((ptr[i].src & M1(PC)) && (ptr[i].op&0xf700000) != 0x5100000) + break; + + // calculate ARM920T interlock cycles (differences only) +#define D2(x,y) ((ptr[x].dst & ptr[y].src)?((ptr[x].src >> CYC2) & 1):0) +#define D1(x,y) ((ptr[x].dst & ptr[y].src)?((ptr[x].src >> CYC1) & 3):0) + // insn sequence: [..., i-2, i-1, i, i+1, ..., n-2, n-1, n] + deps -= D2(i-2,i)+D2(i-1,i+1)+D2(n-2,n ) + D1(i-1,i)+D1(n-1,n); + deps -= !!(ptr[n].src & M2(CYC1,CYC2));// favour moving LDR down + // insn sequence: [..., i-2, i-1, n, i, i+1, ..., n-2, n-1] + deps += D2(i-2,n)+D2(i-1,i )+D2(n ,i+1) + D1(i-1,n)+D1(n ,i); + deps += !!(ptr[i].src & M2(CYC1,CYC2));// penalize moving LDR up + // remember best match found + if (bd > deps) + bd = deps, bi = i; + } + // swap if fewer depencies + if (bd < 0) { + // make room for new insn at bi + struct emit_op tmp = ptr[n]; + for (i = n-1; i >= bi; i--) { + ptr[i+1] = ptr[i]; + if (ptr[i].src & M1(PC)) + emith_pool_adjust(n-i+1, 1); + } + // insert new insn at bi + ptr[bi] = tmp; + if (ptr[bi].src & M1(PC)) + emith_pool_adjust(1, bi-n); + } + if (dst & M1(PC)) { + // commit everything if a branch insn is emitted + for (i = 1; i <= emit_index+1; i++) + EMIT_PTR(emit_ptr, emit_cache[i].op); + emit_index = 0; + } else if (emit_index < EMIT_CACHE_SIZE) { + // queue not yet full + emit_index++; + } else { + // commit oldest insn from cache + EMIT_PTR(emit_ptr, emit_cache[1].op); + for (i = 0; i <= emit_index; i++) + emit_cache[i] = emit_cache[i+1]; + } +} + +static void emith_flush(void) +{ + int i; + void *emit_ptr = tcache_ptr - emit_index*sizeof(u32); + + for (i = 1; i <= emit_index; i++) + EMIT_PTR(emit_ptr, emit_cache[i].op); + emit_index = 0; +} #define A_COND_AL 0xe #define A_COND_EQ 0x0 @@ -46,6 +205,7 @@ #define A_COND_LE 0xd #define A_COND_CS A_COND_HS #define A_COND_CC A_COND_LO +#define A_COND_NV 0xf // Not Valid (aka NeVer :-) - ATTN: not a real condition! /* unified conditions */ #define DCOND_EQ A_COND_EQ @@ -63,6 +223,9 @@ #define DCOND_VS A_COND_VS #define DCOND_VC A_COND_VC +#define DCOND_CS A_COND_HS +#define DCOND_CC A_COND_LO + /* addressing mode 1 */ #define A_AM1_LSL 0 #define A_AM1_LSR 1 @@ -85,18 +248,26 @@ #define A_OP_TST 0x8 #define A_OP_TEQ 0x9 #define A_OP_CMP 0xa -#define A_OP_CMN 0xa +#define A_OP_CMN 0xb #define A_OP_ORR 0xc #define A_OP_MOV 0xd #define A_OP_BIC 0xe #define A_OP_MVN 0xf -#define EOP_C_DOP_X(cond,op,s,rn,rd,shifter_op) \ - EMIT(((cond)<<28) | ((op)<< 21) | ((s)<<20) | ((rn)<<16) | ((rd)<<12) | (shifter_op)) +// operation specific register usage in DOP +#define A_Rn(op,rn) (((op)&0xd)!=0xd ? rn:NO) // no rn for MOV,MVN +#define A_Rd(op,rd) (((op)&0xc)!=0x8 ? rd:NO) // no rd for TST,TEQ,CMP,CMN +// CSPR is dst if S set, CSPR is src if op is ADC/SBC/RSC or shift is RRX +#define A_Sd(s) ((s) ? SR:NO) +#define A_Sr(op,sop) (((op)>=0x5 && (op)<=0x7) || (sop)>>4==A_AM1_ROR<<1 ? SR:NO) -#define EOP_C_DOP_IMM( cond,op,s,rn,rd,ror2,imm8) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_IMM(ror2,imm8)) -#define EOP_C_DOP_REG_XIMM(cond,op,s,rn,rd,shift_imm,shift_op,rm) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_REG_XIMM(shift_imm,shift_op,rm)) -#define EOP_C_DOP_REG_XREG(cond,op,s,rn,rd,rs, shift_op,rm) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_REG_XREG(rs, shift_op,rm)) +#define EOP_C_DOP_X(cond,op,s,rn,rd,sop,rm,rs) \ + EMIT(((cond)<<28) | ((op)<< 21) | ((s)<<20) | ((rn)<<16) | ((rd)<<12) | (sop), \ + M2(A_Rd(op,rd),A_Sd(s)), M5(A_Sr(op,sop),A_Rn(op,rn),rm,rs,rs==NO?NO:CYC1)) + +#define EOP_C_DOP_IMM( cond,op,s,rn,rd,ror2,imm8) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_IMM(ror2,imm8), NO, NO) +#define EOP_C_DOP_REG_XIMM(cond,op,s,rn,rd,shift_imm,shift_op,rm) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_REG_XIMM(shift_imm,shift_op,rm), rm, NO) +#define EOP_C_DOP_REG_XREG(cond,op,s,rn,rd,rs, shift_op,rm) EOP_C_DOP_X(cond,op,s,rn,rd,A_AM1_REG_XREG(rs, shift_op,rm), rm, rs) #define EOP_MOV_IMM(rd, ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_MOV,0, 0,rd,ror2,imm8) #define EOP_MVN_IMM(rd, ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_MVN,0, 0,rd,ror2,imm8) @@ -156,135 +327,243 @@ /* addressing mode 2 */ #define EOP_C_AM2_IMM(cond,u,b,l,rn,rd,offset_12) \ - EMIT(((cond)<<28) | 0x05000000 | ((u)<<23) | ((b)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | (offset_12)) + EMIT(((cond)<<28) | 0x05000000 | ((u)<<23) | ((b)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \ + ((offset_12) & 0xfff), M1(l?rd:MEM), M3(rn,l?MEM:rd,l?b?CYC2:CYC1:NO)) #define EOP_C_AM2_REG(cond,u,b,l,rn,rd,shift_imm,shift_op,rm) \ EMIT(((cond)<<28) | 0x07000000 | ((u)<<23) | ((b)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \ - ((shift_imm)<<7) | ((shift_op)<<5) | (rm)) + A_AM1_REG_XIMM(shift_imm, shift_op, rm), M1(l?rd:MEM), M4(rn,rm,l?MEM:rd,l?b?CYC2:CYC1:NO)) /* addressing mode 3 */ #define EOP_C_AM3(cond,u,r,l,rn,rd,s,h,immed_reg) \ EMIT(((cond)<<28) | 0x01000090 | ((u)<<23) | ((r)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \ - ((s)<<6) | ((h)<<5) | (immed_reg)) + ((s)<<6) | ((h)<<5) | (immed_reg), M1(l?rd:MEM), M4(rn,r?NO:immed_reg,l?MEM:rd,l?CYC2:NO)) #define EOP_C_AM3_IMM(cond,u,l,rn,rd,s,h,offset_8) EOP_C_AM3(cond,u,1,l,rn,rd,s,h,(((offset_8)&0xf0)<<4)|((offset_8)&0xf)) #define EOP_C_AM3_REG(cond,u,l,rn,rd,s,h,rm) EOP_C_AM3(cond,u,0,l,rn,rd,s,h,rm) /* ldr and str */ -#define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,1,0,1,rn,rd,offset_12) -#define EOP_LDRB_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,1,1,1,rn,rd,offset_12) +#define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,1,rn,rd,pabs(offset_12)) +#define EOP_LDRB_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,1,1,rn,rd,pabs(offset_12)) +#define EOP_STR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,(offset_12) >= 0,0,0,rn,rd,pabs(offset_12)) -#define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,offset_12) -#define EOP_LDR_NEGIMM(rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,0,0,1,rn,rd,offset_12) +#define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,1,rn,rd,pabs(offset_12)) #define EOP_LDR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,0) -#define EOP_STR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,offset_12) +#define EOP_STR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,(offset_12) >= 0,0,0,rn,rd,pabs(offset_12)) #define EOP_STR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,0) #define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm) +#define EOP_LDR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,3,rn,rd,shift_imm,A_AM1_LSL,rm) +#define EOP_LDRB_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,1,1,rn,rd,shift_imm,A_AM1_LSL,rm) +#define EOP_STR_REG_LSL_WB(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,2,rn,rd,shift_imm,A_AM1_LSL,rm) -#define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,1,1,rn,rd,0,1,offset_8) +#define EOP_LDRH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,0,1,pabs(offset_8)) +#define EOP_LDRH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,0,1,rm) -#define EOP_LDRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,offset_8) +#define EOP_LDRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,1,rn,rd,0,1,pabs(offset_8)) #define EOP_LDRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,0) #define EOP_LDRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,1,rn,rd,0,1,rm) -#define EOP_STRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,1,0,rn,rd,0,1,offset_8) +#define EOP_STRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,(offset_8) >= 0,0,rn,rd,0,1,pabs(offset_8)) #define EOP_STRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,0,rn,rd,0,1,0) #define EOP_STRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,0,rn,rd,0,1,rm) +#define EOP_LDRSB_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,1,0,pabs(offset_8)) +#define EOP_LDRSB_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,1,0,rm) +#define EOP_LDRSH_IMM2(cond,rd,rn,offset_8) EOP_C_AM3_IMM(cond,(offset_8) >= 0,1,rn,rd,1,1,pabs(offset_8)) +#define EOP_LDRSH_REG2(cond,rd,rn,rm) EOP_C_AM3_REG(cond,1,1,rn,rd,1,1,rm) + /* ldm and stm */ #define EOP_XXM(cond,p,u,s,w,l,rn,list) \ - EMIT(((cond)<<28) | (1<<27) | ((p)<<24) | ((u)<<23) | ((s)<<22) | ((w)<<21) | ((l)<<20) | ((rn)<<16) | (list)) + EMIT(((cond)<<28) | (1<<27) | ((p)<<24) | ((u)<<23) | ((s)<<22) | ((w)<<21) | ((l)<<20) | ((rn)<<16) | (list), \ + M2(rn,l?NO:MEM)|(l?list:0), M3(rn,l?MEM:NO,l?CYC2:NO)|(l?0:list)) #define EOP_STMIA(rb,list) EOP_XXM(A_COND_AL,0,1,0,0,0,rb,list) #define EOP_LDMIA(rb,list) EOP_XXM(A_COND_AL,0,1,0,0,1,rb,list) -#define EOP_STMFD_SP(list) EOP_XXM(A_COND_AL,1,0,0,1,0,13,list) -#define EOP_LDMFD_SP(list) EOP_XXM(A_COND_AL,0,1,0,1,1,13,list) +#define EOP_STMFD_SP(list) EOP_XXM(A_COND_AL,1,0,0,1,0,SP,list) +#define EOP_LDMFD_SP(list) EOP_XXM(A_COND_AL,0,1,0,1,1,SP,list) /* branches */ #define EOP_C_BX(cond,rm) \ - EMIT(((cond)<<28) | 0x012fff10 | (rm)) + EMIT(((cond)<<28) | 0x012fff10 | (rm), M1(PC), M1(rm)) #define EOP_C_B_PTR(ptr,cond,l,signed_immed_24) \ EMIT_PTR(ptr, ((cond)<<28) | 0x0a000000 | ((l)<<24) | (signed_immed_24)) #define EOP_C_B(cond,l,signed_immed_24) \ - EOP_C_B_PTR(tcache_ptr,cond,l,signed_immed_24) + EMIT(((cond)<<28) | 0x0a000000 | ((l)<<24) | (signed_immed_24), M2(PC,l?LR:NO), M1(PC)) #define EOP_B( signed_immed_24) EOP_C_B(A_COND_AL,0,signed_immed_24) #define EOP_BL(signed_immed_24) EOP_C_B(A_COND_AL,1,signed_immed_24) /* misc */ #define EOP_C_MUL(cond,s,rd,rs,rm) \ - EMIT(((cond)<<28) | ((s)<<20) | ((rd)<<16) | ((rs)<<8) | 0x90 | (rm)) + EMIT(((cond)<<28) | ((s)<<20) | ((rd)<<16) | ((rs)<<8) | 0x90 | (rm), M2(rd,s?SR:NO), M3(rs,rm,CYC2)) #define EOP_C_UMULL(cond,s,rdhi,rdlo,rs,rm) \ - EMIT(((cond)<<28) | 0x00800000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm)) + EMIT(((cond)<<28) | 0x00800000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M4(rs,rm,CYC1,CYC2)) #define EOP_C_SMULL(cond,s,rdhi,rdlo,rs,rm) \ - EMIT(((cond)<<28) | 0x00c00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm)) + EMIT(((cond)<<28) | 0x00c00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M4(rs,rm,CYC1,CYC2)) #define EOP_C_SMLAL(cond,s,rdhi,rdlo,rs,rm) \ - EMIT(((cond)<<28) | 0x00e00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm)) + EMIT(((cond)<<28) | 0x00e00000 | ((s)<<20) | ((rdhi)<<16) | ((rdlo)<<12) | ((rs)<<8) | 0x90 | (rm), M3(rdhi,rdlo,s?SR:NO), M6(rs,rm,rdlo,rdhi,CYC1,CYC2)) #define EOP_MUL(rd,rm,rs) EOP_C_MUL(A_COND_AL,0,rd,rs,rm) // note: rd != rm #define EOP_C_MRS(cond,rd) \ - EMIT(((cond)<<28) | 0x010f0000 | ((rd)<<12)) + EMIT(((cond)<<28) | 0x010f0000 | ((rd)<<12), M1(rd), M1(SR)) #define EOP_C_MSR_IMM(cond,ror2,imm) \ - EMIT(((cond)<<28) | 0x0328f000 | ((ror2)<<8) | (imm)) // cpsr_f + EMIT(((cond)<<28) | 0x0328f000 | ((ror2)<<8) | (imm), M1(SR), 0) // cpsr_f #define EOP_C_MSR_REG(cond,rm) \ - EMIT(((cond)<<28) | 0x0128f000 | (rm)) // cpsr_f + EMIT(((cond)<<28) | 0x0128f000 | (rm), M1(SR), M1(rm)) // cpsr_f #define EOP_MRS(rd) EOP_C_MRS(A_COND_AL,rd) #define EOP_MSR_IMM(ror2,imm) EOP_C_MSR_IMM(A_COND_AL,ror2,imm) #define EOP_MSR_REG(rm) EOP_C_MSR_REG(A_COND_AL,rm) +#define EOP_MOVW(cond,rd,imm) \ + EMIT(((cond)<<28) | 0x03000000 | ((rd)<<12) | ((imm)&0xfff) | (((imm)<<4)&0xf0000), M1(rd), NO) -// XXX: AND, RSB, *C, will break if 1 insn is not enough +#define EOP_MOVT(cond,rd,imm) \ + EMIT(((cond)<<28) | 0x03400000 | ((rd)<<12) | (((imm)>>16)&0xfff) | (((imm)>>12)&0xf0000), M1(rd), NO) + +// host literal pool; must be significantly smaller than 1024 (max LDR offset = 4096) +#define MAX_HOST_LITERALS 128 +static u32 literal_pool[MAX_HOST_LITERALS]; +static u32 *literal_insn[MAX_HOST_LITERALS]; +static int literal_pindex, literal_iindex; + +static inline int emith_pool_literal(u32 imm, int *offs) +{ + int idx = literal_pindex - 8; // max look behind in pool + // see if one of the last literals was the same (or close enough) + for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++) + if (abs((int)(imm - literal_pool[idx])) <= 0xff) + break; + if (idx == literal_pindex) // store new literal + literal_pool[literal_pindex++] = imm; + *offs = imm - literal_pool[idx]; + return idx; +} + +// XXX: RSB, *S will break if 1 insn is not enough static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int imm) { int ror2; u32 v; + int i; - switch (op) { - case A_OP_MOV: - rn = 0; - if (~imm < 0x10000) { - imm = ~imm; - op = A_OP_MVN; - } - break; + if (cond == A_COND_NV) + return; - case A_OP_EOR: - case A_OP_SUB: - case A_OP_ADD: - case A_OP_ORR: - case A_OP_BIC: - if (s == 0 && imm == 0) - return; - break; - } + do { + u32 u; + // try to get the topmost byte empty to possibly save an insn + for (v = imm, ror2 = 0; (v >> 24) && ror2 < 32/2; ror2++) + v = (v << 2) | (v >> 30); - for (v = imm, ror2 = 0; ; ror2 -= 8/2) { - /* shift down to get 'best' rot2 */ - for (; v && !(v & 3); v >>= 2) - ror2--; - - EOP_C_DOP_IMM(cond, op, s, rn, rd, ror2 & 0x0f, v & 0xff); - - v >>= 8; - if (v == 0) + switch (op) { + case A_OP_MOV: + case A_OP_MVN: + rn = 0; + // use MVN if more bits 1 than 0 + if (count_bits(imm) > 16) { + imm = ~imm; + op = A_OP_MVN; + ror2 = -1; + break; + } + // count insns needed for mov/orr #imm +#ifdef HAVE_ARMV7 + for (i = 2, u = v; i > 0 && u; i--, u >>= 8) + while (u > 0xff && !(u & 3)) + u >>= 2; + if (u) { // 3+ insns needed... + if (op == A_OP_MVN) + imm = ~imm; + // ...prefer movw/movt + EOP_MOVW(cond,rd, imm); + if (imm & 0xffff0000) + EOP_MOVT(cond,rd, imm); + return; + } +#else + for (i = 2, u = v; i > 0 && u; i--, u >>= 8) + while (u > 0xff && !(u & 3)) + u >>= 2; + if (u) { // 3+ insns needed... + if (op == A_OP_MVN) + imm = ~imm; + // ...emit literal load + int idx, o; + if (literal_iindex >= MAX_HOST_LITERALS) { + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, + "pool overflow"); + exit(1); + } + idx = emith_pool_literal(imm, &o); + literal_insn[literal_iindex++] = (u32 *)tcache_ptr; + EOP_LDR_IMM2(cond, rd, PC, idx * sizeof(u32)); + if (o > 0) + EOP_C_DOP_IMM(cond, A_OP_ADD, 0,rd,rd,0,o); + else if (o < 0) + EOP_C_DOP_IMM(cond, A_OP_SUB, 0,rd,rd,0,-o); + return; + } +#endif break; - if (op == A_OP_MOV) - op = A_OP_ORR; - if (op == A_OP_MVN) - op = A_OP_BIC; + + case A_OP_AND: + // AND must fit into 1 insn. if not, use BIC + for (u = v; u > 0xff && !(u & 3); u >>= 2) ; + if (u >> 8) { + imm = ~imm; + op = A_OP_BIC; + ror2 = -1; + } + break; + + case A_OP_SUB: + case A_OP_ADD: + // swap ADD and SUB if more bits 1 than 0 + if (s == 0 && count_bits(imm) > 16) { + imm = -imm; + op ^= (A_OP_ADD^A_OP_SUB); + ror2 = -1; + } + case A_OP_EOR: + case A_OP_ORR: + case A_OP_BIC: + if (s == 0 && imm == 0 && rd == rn) + return; + break; + } + } while (ror2 < 0); + + do { + // shift down to get 'best' rot2 + while (v > 0xff && !(v & 3)) + v >>= 2, ror2--; + EOP_C_DOP_IMM(cond, op, s, rn, rd, ror2 & 0xf, v & 0xff); + + switch (op) { + case A_OP_MOV: op = A_OP_ORR; break; + case A_OP_MVN: op = A_OP_BIC; break; + case A_OP_ADC: op = A_OP_ADD; break; + case A_OP_SBC: op = A_OP_SUB; break; + } rn = rd; - } + + v >>= 8, ror2 -= 8/2; + if (v && s) { + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "op+s %x value too big", op); + exit(1); + } + } while (v); } #define emith_op_imm(cond, s, op, r, imm) \ @@ -307,6 +586,9 @@ static int emith_xbranch(int cond, void *target, int is_call) int direct = is_offset_24(val); u32 *start_ptr = (u32 *)tcache_ptr; + if (cond == A_COND_NV) + return 0; // never taken + if (direct) { EOP_C_B(cond,is_call,val & 0xffffff); // b, bl target @@ -316,13 +598,13 @@ static int emith_xbranch(int cond, void *target, int is_call) #ifdef __EPOC32__ // elprintf(EL_SVP, "emitting indirect jmp %08x->%08x", tcache_ptr, target); if (is_call) - EOP_ADD_IMM(14,15,0,8); // add lr,pc,#8 - EOP_C_AM2_IMM(cond,1,0,1,15,15,0); // ldrcc pc,[pc] - EOP_MOV_REG_SIMPLE(15,15); // mov pc, pc - EMIT((u32)target); + EOP_ADD_IMM(LR,PC,0,8); // add lr,pc,#8 + EOP_C_AM2_IMM(cond,1,0,1,PC,PC,0); // ldrcc pc,[pc] + EOP_MOV_REG_SIMPLE(PC,PC); // mov pc, pc + EMIT((u32)target,M1(PC),0); #else // should never happen - elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "indirect jmp %08x->%08x", target, tcache_ptr); + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, "indirect jmp %8p->%8p", target, tcache_ptr); exit(1); #endif } @@ -330,12 +612,71 @@ static int emith_xbranch(int cond, void *target, int is_call) return (u32 *)tcache_ptr - start_ptr; } -#define JMP_POS(ptr) \ +static void emith_pool_commit(int jumpover) +{ + int i, sz = literal_pindex * sizeof(u32); + u8 *pool = (u8 *)tcache_ptr; + + // nothing to commit if pool is empty + if (sz == 0) + return; + // need branch over pool if not at block end + if (jumpover < 0 && sz == sizeof(u32)) { + // hack for SVP drc (patch logic detects distance 4) + sz += sizeof(u32); + } else if (jumpover) { + pool += sizeof(u32); + emith_xbranch(A_COND_AL, (u8 *)pool + sz, 0); + } + emith_flush(); + // safety check - pool must be after insns and reachable + if ((u32)(pool - (u8 *)literal_insn[0] + 8) > 0xfff) { + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, + "pool offset out of range"); + exit(1); + } + // copy pool and adjust addresses in insns accessing the pool + memcpy(pool, literal_pool, sz); + for (i = 0; i < literal_iindex; i++) { + *literal_insn[i] += (u8 *)pool - ((u8 *)literal_insn[i] + 8); + } + // count pool constants as insns for statistics + for (i = 0; i < literal_pindex; i++) + COUNT_OP; + + tcache_ptr = (void *)((u8 *)pool + sz); + literal_pindex = literal_iindex = 0; +} + +static inline void emith_pool_check(void) +{ + // check if pool must be committed + if (literal_iindex > MAX_HOST_LITERALS-4 || (literal_pindex && + (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0xe00)) + // pool full, or displacement is approaching the limit + emith_pool_commit(1); +} + +static inline void emith_pool_adjust(int tcache_offs, int move_offs) +{ + u32 *ptr = (u32 *)tcache_ptr - tcache_offs; + int i; + + for (i = literal_iindex-1; i >= 0 && literal_insn[i] >= ptr; i--) + if (literal_insn[i] == ptr) + literal_insn[i] += move_offs; +} + +#define EMITH_HINT_COND(cond) /**/ + +#define JMP_POS(ptr) { \ ptr = tcache_ptr; \ - tcache_ptr += sizeof(u32) + EMIT(0,M1(PC),0); \ +} #define JMP_EMIT(cond, ptr) { \ u32 val_ = (u32 *)tcache_ptr - (u32 *)(ptr) - 2; \ + emith_flush(); /* NO insn swapping across jump targets */ \ EOP_C_B_PTR(ptr, cond, 0, val_ & 0xffffff); \ } @@ -353,52 +694,130 @@ static int emith_xbranch(int cond, void *target, int is_call) #define EMITH_SJMP_START(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP_END(cond) EMITH_NOTHING1(cond) +#define EMITH_SJMP2_START(cond) EMITH_NOTHING1(cond) +#define EMITH_SJMP2_MID(cond) EMITH_JMP_START((cond)^1) // inverse cond +#define EMITH_SJMP2_END(cond) EMITH_JMP_END((cond)^1) #define EMITH_SJMP3_START(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP3_MID(cond) EMITH_NOTHING1(cond) #define EMITH_SJMP3_END() +#define emith_move_r_r_c(cond, d, s) \ + EOP_MOV_REG(cond,0,d,s,A_AM1_LSL,0) #define emith_move_r_r(d, s) \ - EOP_MOV_REG_SIMPLE(d, s) + emith_move_r_r_c(A_COND_AL, d, s) + +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_c(cond, d, s) +#define emith_move_r_r_ptr(d, s) \ + emith_move_r_r(d, s) #define emith_mvn_r_r(d, s) \ EOP_MVN_REG(A_COND_AL,0,d,s,A_AM1_LSL,0) #define emith_add_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_ADD_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) +#define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) \ + emith_add_r_r_r_lsl(d, s1, s2, lslimm) + +#define emith_adc_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_ADC_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) + +#define emith_addf_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) +#define emith_addf_r_r_r_lsr(d, s1, s2, lslimm) \ + EOP_ADD_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSR,lslimm) + +#define emith_adcf_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_ADC_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) + +#define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_SUB_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) + +#define emith_sbc_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_SBC_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) + +#define emith_subf_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_SUB_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) + +#define emith_sbcf_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_SBC_REG(A_COND_AL,1,d,s1,s2,A_AM1_LSL,lslimm) #define emith_or_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) +#define emith_or_r_r_r_lsr(d, s1, s2, lsrimm) \ + EOP_ORR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm) #define emith_eor_r_r_r_lsl(d, s1, s2, lslimm) \ EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) - #define emith_eor_r_r_r_lsr(d, s1, s2, lsrimm) \ EOP_EOR_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSR,lsrimm) +#define emith_and_r_r_r_lsl(d, s1, s2, lslimm) \ + EOP_AND_REG(A_COND_AL,0,d,s1,s2,A_AM1_LSL,lslimm) + #define emith_or_r_r_lsl(d, s, lslimm) \ emith_or_r_r_r_lsl(d, d, s, lslimm) +#define emith_or_r_r_lsr(d, s, lsrimm) \ + emith_or_r_r_r_lsr(d, d, s, lsrimm) +#define emith_eor_r_r_lsl(d, s, lslimm) \ + emith_eor_r_r_r_lsl(d, d, s, lslimm) #define emith_eor_r_r_lsr(d, s, lsrimm) \ emith_eor_r_r_r_lsr(d, d, s, lsrimm) #define emith_add_r_r_r(d, s1, s2) \ emith_add_r_r_r_lsl(d, s1, s2, 0) +#define emith_adc_r_r_r(d, s1, s2) \ + emith_adc_r_r_r_lsl(d, s1, s2, 0) + +#define emith_addf_r_r_r(d, s1, s2) \ + emith_addf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_adcf_r_r_r(d, s1, s2) \ + emith_adcf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_sub_r_r_r(d, s1, s2) \ + emith_sub_r_r_r_lsl(d, s1, s2, 0) + +#define emith_sbc_r_r_r(d, s1, s2) \ + emith_sbc_r_r_r_lsl(d, s1, s2, 0) + +#define emith_subf_r_r_r(d, s1, s2) \ + emith_subf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_sbcf_r_r_r(d, s1, s2) \ + emith_sbcf_r_r_r_lsl(d, s1, s2, 0) + #define emith_or_r_r_r(d, s1, s2) \ emith_or_r_r_r_lsl(d, s1, s2, 0) #define emith_eor_r_r_r(d, s1, s2) \ emith_eor_r_r_r_lsl(d, s1, s2, 0) +#define emith_and_r_r_r(d, s1, s2) \ + emith_and_r_r_r_lsl(d, s1, s2, 0) + #define emith_add_r_r(d, s) \ emith_add_r_r_r(d, d, s) -#define emith_sub_r_r(d, s) \ - EOP_SUB_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) +#define emith_add_r_r_ptr(d, s) \ + emith_add_r_r_r(d, d, s) #define emith_adc_r_r(d, s) \ - EOP_ADC_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) + emith_adc_r_r_r(d, d, s) +#define emith_sub_r_r(d, s) \ + emith_sub_r_r_r(d, d, s) + +#define emith_sbc_r_r(d, s) \ + emith_sbc_r_r_r(d, d, s) + +#define emith_negc_r_r(d, s) \ + EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,0,s,d,0,0) + +#define emith_and_r_r_c(cond, d, s) \ + EOP_AND_REG(cond,0,d,d,s,A_AM1_LSL,0) #define emith_and_r_r(d, s) \ EOP_AND_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) @@ -411,6 +830,9 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_tst_r_r(d, s) \ EOP_TST_REG(A_COND_AL,d,s,A_AM1_LSL,0) +#define emith_tst_r_r_ptr(d, s) \ + emith_tst_r_r(d, s) + #define emith_teq_r_r(d, s) \ EOP_TEQ_REG(A_COND_AL,d,s,A_AM1_LSL,0) @@ -435,12 +857,18 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_move_r_imm(r, imm) \ emith_op_imm(A_COND_AL, 0, A_OP_MOV, r, imm) +#define emith_move_r_ptr_imm(r, imm) \ + emith_move_r_imm(r, (u32)(imm)) + #define emith_add_r_imm(r, imm) \ emith_op_imm(A_COND_AL, 0, A_OP_ADD, r, imm) #define emith_adc_r_imm(r, imm) \ emith_op_imm(A_COND_AL, 0, A_OP_ADC, r, imm) +#define emith_adcf_r_imm(r, imm) \ + emith_op_imm(A_COND_AL, 1, A_OP_ADC, r, imm) + #define emith_sub_r_imm(r, imm) \ emith_op_imm(A_COND_AL, 0, A_OP_SUB, r, imm) @@ -456,18 +884,21 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_eor_r_imm(r, imm) \ emith_op_imm(A_COND_AL, 0, A_OP_EOR, r, imm) +#define emith_eor_r_imm_ptr(r, imm) \ + emith_eor_r_imm(r, imm) + // note: only use 8bit imm for these #define emith_tst_r_imm(r, imm) \ emith_top_imm(A_COND_AL, A_OP_TST, r, imm) -#define emith_cmp_r_imm(r, imm) { \ - u32 op = A_OP_CMP, imm_ = imm; \ - if (~imm_ < 0x100) { \ - imm_ = ~imm_; \ - op = A_OP_CMN; \ +#define emith_cmp_r_imm(r, imm) do { \ + u32 op_ = A_OP_CMP, imm_ = (u8)imm; \ + if ((s8)imm_ < 0) { \ + imm_ = (u8)-imm_; \ + op_ = A_OP_CMN; \ } \ - emith_top_imm(A_COND_AL, op, r, imm); \ -} + emith_top_imm(A_COND_AL, op_, r, imm_); \ +} while (0) #define emith_subf_r_imm(r, imm) \ emith_op_imm(A_COND_AL, 1, A_OP_SUB, r, imm) @@ -487,15 +918,29 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_eor_r_imm_c(cond, r, imm) \ emith_op_imm(cond, 0, A_OP_EOR, r, imm) +#define emith_eor_r_imm_ptr_c(cond, r, imm) \ + emith_eor_r_imm_c(cond, r, imm) + #define emith_bic_r_imm_c(cond, r, imm) \ emith_op_imm(cond, 0, A_OP_BIC, r, imm) -#define emith_move_r_imm_s8(r, imm) { \ - if ((imm) & 0x80) \ - EOP_MVN_IMM(r, 0, ((imm) ^ 0xff)); \ +#define emith_tst_r_imm_c(cond, r, imm) \ + emith_top_imm(cond, A_OP_TST, r, imm) + +#define emith_move_r_imm_s8_patchable(r, imm) do { \ + emith_flush(); /* pin insn at current tcache_ptr for patching */ \ + if ((s8)(imm) < 0) \ + EOP_MVN_IMM(r, 0, (u8)~(imm)); \ else \ - EOP_MOV_IMM(r, 0, imm); \ -} + EOP_MOV_IMM(r, 0, (u8)(imm)); \ +} while (0) +#define emith_move_r_imm_s8_patch(ptr, imm) do { \ + u32 *ptr_ = (u32 *)ptr; u32 op_ = *ptr_ & 0xfe1ff000; \ + if ((s8)(imm) < 0) \ + EMIT_PTR(ptr_, op_ | (A_OP_MVN<<21) | (u8)~(imm));\ + else \ + EMIT_PTR(ptr_, op_ | (A_OP_MOV<<21) | (u8)(imm));\ +} while (0) #define emith_and_r_r_imm(d, s, imm) \ emith_op_imm2(A_COND_AL, 0, A_OP_AND, d, s, imm) @@ -503,9 +948,24 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_add_r_r_imm(d, s, imm) \ emith_op_imm2(A_COND_AL, 0, A_OP_ADD, d, s, imm) +#define emith_add_r_r_ptr_imm(d, s, imm) \ + emith_add_r_r_imm(d, s, imm) + +#define emith_sub_r_r_imm_c(cond, d, s, imm) \ + emith_op_imm2(cond, 0, A_OP_SUB, d, s, (imm)) + #define emith_sub_r_r_imm(d, s, imm) \ emith_op_imm2(A_COND_AL, 0, A_OP_SUB, d, s, imm) +#define emith_subf_r_r_imm(d, s, imm) \ + emith_op_imm2(A_COND_AL, 1, A_OP_SUB, d, s, imm) + +#define emith_or_r_r_imm(d, s, imm) \ + emith_op_imm2(A_COND_AL, 0, A_OP_ORR, d, s, imm) + +#define emith_eor_r_r_imm(d, s, imm) \ + emith_op_imm2(A_COND_AL, 0, A_OP_EOR, d, s, imm) + #define emith_neg_r_r(d, s) \ EOP_RSB_IMM(d, s, 0, 0) @@ -537,30 +997,34 @@ static int emith_xbranch(int cond, void *target, int is_call) EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_ASR,cnt) // note: only C flag updated correctly -#define emith_rolf(d, s, cnt) { \ +#define emith_rolf(d, s, cnt) do { \ EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_ROR,32-(cnt)); \ /* we don't have ROL so we shift to get the right carry */ \ EOP_TST_REG(A_COND_AL,d,d,A_AM1_LSR,1); \ -} +} while (0) #define emith_rorf(d, s, cnt) \ EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_ROR,cnt) #define emith_rolcf(d) \ emith_adcf_r_r(d, d) +#define emith_rolc(d) \ + emith_adc_r_r(d, d) #define emith_rorcf(d) \ EOP_MOV_REG(A_COND_AL,1,d,d,A_AM1_ROR,0) /* ROR #0 -> RRX */ +#define emith_rorc(d) \ + EOP_MOV_REG(A_COND_AL,0,d,d,A_AM1_ROR,0) /* ROR #0 -> RRX */ #define emith_negcf_r_r(d, s) \ EOP_C_DOP_IMM(A_COND_AL,A_OP_RSC,1,s,d,0,0) -#define emith_mul(d, s1, s2) { \ +#define emith_mul(d, s1, s2) do { \ if ((d) != (s1)) /* rd != rm limitation */ \ EOP_MUL(d, s1, s2); \ else \ EOP_MUL(d, s2, s1); \ -} +} while (0) #define emith_mul_u64(dlo, dhi, s1, s2) \ EOP_C_UMULL(A_COND_AL,0,dhi,dlo,s1,s2) @@ -568,30 +1032,77 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_mul_s64(dlo, dhi, s1, s2) \ EOP_C_SMULL(A_COND_AL,0,dhi,dlo,s1,s2) +#define emith_mula_s64_c(cond, dlo, dhi, s1, s2) \ + EOP_C_SMLAL(cond,0,dhi,dlo,s1,s2) #define emith_mula_s64(dlo, dhi, s1, s2) \ EOP_C_SMLAL(A_COND_AL,0,dhi,dlo,s1,s2) // misc #define emith_read_r_r_offs_c(cond, r, rs, offs) \ EOP_LDR_IMM2(cond, r, rs, offs) +#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_read_r_r_offs_c(cond, r, rs, offs) +#define emith_read_r_r_r_c(cond, r, rs, rm) \ + EOP_LDR_REG_LSL(cond, r, rs, rm, 0) +#define emith_read_r_r_offs(r, rs, offs) \ + emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read_r_r_offs_ptr(r, rs, offs) \ + emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read_r_r_r(r, rs, rm) \ + EOP_LDR_REG_LSL(A_COND_AL, r, rs, rm, 0) #define emith_read8_r_r_offs_c(cond, r, rs, offs) \ EOP_LDRB_IMM2(cond, r, rs, offs) +#define emith_read8_r_r_r_c(cond, r, rs, rm) \ + EOP_LDRB_REG_LSL(cond, r, rs, rm, 0) +#define emith_read8_r_r_offs(r, rs, offs) \ + emith_read8_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read8_r_r_r(r, rs, rm) \ + emith_read8_r_r_r_c(A_COND_AL, r, rs, rm) #define emith_read16_r_r_offs_c(cond, r, rs, offs) \ EOP_LDRH_IMM2(cond, r, rs, offs) - -#define emith_read_r_r_offs(r, rs, offs) \ - emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) - -#define emith_read8_r_r_offs(r, rs, offs) \ - emith_read8_r_r_offs_c(A_COND_AL, r, rs, offs) - +#define emith_read16_r_r_r_c(cond, r, rs, rm) \ + EOP_LDRH_REG2(cond, r, rs, rm) #define emith_read16_r_r_offs(r, rs, offs) \ emith_read16_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read16_r_r_r(r, rs, rm) \ + emith_read16_r_r_r_c(A_COND_AL, r, rs, rm) +#define emith_read8s_r_r_offs_c(cond, r, rs, offs) \ + EOP_LDRSB_IMM2(cond, r, rs, offs) +#define emith_read8s_r_r_r_c(cond, r, rs, rm) \ + EOP_LDRSB_REG2(cond, r, rs, rm) +#define emith_read8s_r_r_offs(r, rs, offs) \ + emith_read8s_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read8s_r_r_r(r, rs, rm) \ + emith_read8s_r_r_r_c(A_COND_AL, r, rs, rm) + +#define emith_read16s_r_r_offs_c(cond, r, rs, offs) \ + EOP_LDRSH_IMM2(cond, r, rs, offs) +#define emith_read16s_r_r_r_c(cond, r, rs, rm) \ + EOP_LDRSH_REG2(cond, r, rs, rm) +#define emith_read16s_r_r_offs(r, rs, offs) \ + emith_read16s_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read16s_r_r_r(r, rs, rm) \ + emith_read16s_r_r_r_c(A_COND_AL, r, rs, rm) + +#define emith_write_r_r_offs_c(cond, r, rs, offs) \ + EOP_STR_IMM2(cond, r, rs, offs) +#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_write_r_r_offs_c(cond, r, rs, offs) +#define emith_write_r_r_offs(r, rs, offs) \ + emith_write_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_write_r_r_offs_ptr(r, rs, offs) \ + emith_write_r_r_offs_c(A_COND_AL, r, rs, offs) + +#define emith_ctx_read_c(cond, r, offs) \ + emith_read_r_r_offs_c(cond, r, CONTEXT_REG, offs) #define emith_ctx_read(r, offs) \ - emith_read_r_r_offs(r, CONTEXT_REG, offs) + emith_ctx_read_c(A_COND_AL, r, offs) + +#define emith_ctx_read_ptr(r, offs) \ + emith_ctx_read(r, offs) #define emith_ctx_write(r, offs) \ EOP_STR_IMM(r, CONTEXT_REG, offs) @@ -599,13 +1110,13 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_ctx_do_multiple(op, r, offs, count, tmpr) do { \ int v_, r_ = r, c_ = count, b_ = CONTEXT_REG; \ for (v_ = 0; c_; c_--, r_++) \ - v_ |= 1 << r_; \ + v_ |= M1(r_); \ if ((offs) != 0) { \ EOP_ADD_IMM(tmpr,CONTEXT_REG,30/2,(offs)>>2);\ b_ = tmpr; \ } \ op(b_,v_); \ -} while(0) +} while (0) #define emith_ctx_read_multiple(r, offs, count, tmpr) \ emith_ctx_do_multiple(EOP_LDMIA, r, offs, count, tmpr) @@ -613,40 +1124,40 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_ctx_write_multiple(r, offs, count, tmpr) \ emith_ctx_do_multiple(EOP_STMIA, r, offs, count, tmpr) -#define emith_clear_msb_c(cond, d, s, count) { \ +#define emith_clear_msb_c(cond, d, s, count) do { \ u32 t; \ if ((count) <= 8) { \ - t = (count) - 8; \ + t = 8 - (count); \ t = (0xff << t) & 0xff; \ - EOP_BIC_IMM(d,s,8/2,t); \ EOP_C_DOP_IMM(cond,A_OP_BIC,0,s,d,8/2,t); \ } else if ((count) >= 24) { \ t = (count) - 24; \ t = 0xff >> t; \ - EOP_AND_IMM(d,s,0,t); \ EOP_C_DOP_IMM(cond,A_OP_AND,0,s,d,0,t); \ } else { \ EOP_MOV_REG(cond,0,d,s,A_AM1_LSL,count); \ EOP_MOV_REG(cond,0,d,d,A_AM1_LSR,count); \ } \ -} +} while (0) #define emith_clear_msb(d, s, count) \ emith_clear_msb_c(A_COND_AL, d, s, count) -#define emith_sext(d, s, bits) { \ +#define emith_sext(d, s, bits) do { \ EOP_MOV_REG_LSL(d,s,32 - (bits)); \ EOP_MOV_REG_ASR(d,d,32 - (bits)); \ -} +} while (0) -#define emith_do_caller_regs(mask, func) { \ +#define emith_uext_ptr(r) /**/ + +#define emith_do_caller_regs(mask, func) do { \ u32 _reg_mask = (mask) & 0x500f; \ if (_reg_mask) { \ if (__builtin_parity(_reg_mask) == 1) \ _reg_mask |= 0x10; /* eabi align */ \ func(_reg_mask); \ } \ -} +} while (0) #define emith_save_caller_regs(mask) \ emith_do_caller_regs(mask, EOP_STMFD_SP) @@ -669,20 +1180,26 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_jump_cond(cond, target) \ emith_xbranch(cond, target, 0) +#define emith_jump_cond_inrange(target) !0 #define emith_jump_cond_patchable(cond, target) \ emith_jump_cond(cond, target) -#define emith_jump_patch(ptr, target) do { \ - u32 *ptr_ = ptr; \ +#define emith_jump_patch(ptr, target, pos) do { \ + u32 *ptr_ = (u32 *)ptr; \ u32 val_ = (u32 *)(target) - ptr_ - 2; \ *ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \ + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ } while (0) +#define emith_jump_patch_inrange(ptr, target) !0 +#define emith_jump_patch_size() 4 -#define emith_jump_at(ptr, target) { \ - u32 val_ = (u32 *)(target) - (u32 *)(ptr) - 2; \ - EOP_C_B_PTR(ptr, A_COND_AL, 0, val_ & 0xffffff); \ -} +#define emith_jump_at(ptr, target) do { \ + u32 *ptr_ = (u32 *)ptr; \ + u32 val_ = (u32 *)(target) - ptr_ - 2; \ + EOP_C_B_PTR(ptr_, A_COND_AL, 0, val_ & 0xffffff); \ +} while (0) +#define emith_jump_at_size() 4 #define emith_jump_reg_c(cond, r) \ EOP_C_BX(cond, r) @@ -691,7 +1208,7 @@ static int emith_xbranch(int cond, void *target, int is_call) emith_jump_reg_c(A_COND_AL, r) #define emith_jump_ctx_c(cond, offs) \ - EOP_LDR_IMM2(cond,15,CONTEXT_REG,offs) + EOP_LDR_IMM2(cond,PC,CONTEXT_REG,offs) #define emith_jump_ctx(offs) \ emith_jump_ctx_c(A_COND_AL, offs) @@ -702,48 +1219,90 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_call(target) \ emith_call_cond(A_COND_AL, target) -#define emith_call_ctx(offs) { \ - emith_move_r_r(14, 15); \ +#define emith_call_reg(r) do { \ + emith_move_r_r(LR, PC); \ + EOP_C_BX(A_COND_AL, r); \ +} while (0) + +#define emith_abicall_ctx(offs) do { \ + emith_move_r_r(LR, PC); \ emith_jump_ctx(offs); \ -} +} while (0) + +#define emith_abijump_reg(r) \ + emith_jump_reg(r) +#define emith_abijump_reg_c(cond, r) \ + emith_jump_reg_c(cond, r) +#define emith_abicall(target) \ + emith_call(target) +#define emith_abicall_cond(cond, target) \ + emith_call_cond(cond, target) +#define emith_abicall_reg(r) \ + emith_call_reg(r) + +#define emith_call_cleanup() /**/ #define emith_ret_c(cond) \ - emith_jump_reg_c(cond, 14) + emith_jump_reg_c(cond, LR) #define emith_ret() \ emith_ret_c(A_COND_AL) #define emith_ret_to_ctx(offs) \ - emith_ctx_write(14, offs) + emith_ctx_write(LR, offs) -#define emith_push_ret() \ - EOP_STMFD_SP(A_R14M) +#define emith_add_r_ret(r) \ + emith_add_r_r_ptr(r, LR) -#define emith_pop_and_ret() \ - EOP_LDMFD_SP(A_R15M) +/* pushes r12 for eabi alignment */ +#define emith_push_ret(r) do { \ + int r_ = (r >= 0 ? r : 12); \ + EOP_STMFD_SP(M2(r_,LR)); \ +} while (0) -#define host_instructions_updated(base, end) \ - cache_flush_d_inval_i(base, end) +#define emith_pop_and_ret(r) do { \ + int r_ = (r >= 0 ? r : 12); \ + EOP_LDMFD_SP(M2(r_,PC)); \ +} while (0) + +#define host_instructions_updated(base, end, force) \ + do { if (force) emith_update_add(base, end); } while (0) + +#define host_call(addr, args) \ + addr #define host_arg2reg(rd, arg) \ rd = arg +#define emith_rw_offs_max() 0x1ff // minimum of offset in AM2 and AM3 + /* SH2 drc specific */ /* pushes r12 for eabi alignment */ #define emith_sh2_drc_entry() \ - EOP_STMFD_SP(A_R4M|A_R5M|A_R6M|A_R7M|A_R8M|A_R9M|A_R10M|A_R11M|A_R12M|A_R14M) + EOP_STMFD_SP(M10(4,5,6,7,8,9,10,11,12,LR)) #define emith_sh2_drc_exit() \ - EOP_LDMFD_SP(A_R4M|A_R5M|A_R6M|A_R7M|A_R8M|A_R9M|A_R10M|A_R11M|A_R12M|A_R15M) + EOP_LDMFD_SP(M10(4,5,6,7,8,9,10,11,12,PC)) -#define emith_sh2_wcall(a, tab) { \ - emith_lsr(12, a, SH2_WRITE_SHIFT); \ - EOP_LDR_REG_LSL(A_COND_AL,12,tab,12,2); \ - emith_move_r_r(2, CONTEXT_REG); \ - emith_jump_reg(12); \ -} +// assumes a is in arg0, tab, func and mask are temp +#define emith_sh2_rcall(a, tab, func, mask) do { \ + emith_lsr(mask, a, SH2_READ_SHIFT); \ + EOP_ADD_REG_LSL(tab, tab, mask, 3); \ + if (func < mask) EOP_LDMIA(tab, M2(func,mask)); /* ldm if possible */ \ + else { emith_read_r_r_offs(func, tab, 0); \ + emith_read_r_r_offs(mask, tab, 4); } \ + emith_addf_r_r_r(func,func,func); \ +} while (0) -#define emith_sh2_dtbf_loop() { \ +// assumes a, val are in arg0 and arg1, tab and func are temp +#define emith_sh2_wcall(a, val, tab, func) do { \ + emith_lsr(func, a, SH2_WRITE_SHIFT); \ + EOP_LDR_REG_LSL(A_COND_AL,func,tab,func,2); \ + emith_move_r_r(2, CONTEXT_REG); /* arg2 */ \ + emith_abijump_reg(func); \ +} while (0) + +#define emith_sh2_dtbf_loop() do { \ int cr, rn; \ int tmp_ = rcache_get_tmp(); \ cr = rcache_get_reg(SHR_SR, RC_GR_RMW); \ @@ -762,53 +1321,195 @@ static int emith_xbranch(int cond, void *target, int is_call) EOP_ORR_IMM_C(A_COND_LS,cr,cr,0,1); /* orrls cr, #1 */ \ EOP_MOV_IMM_C(A_COND_LS,rn,0,0); /* movls rn, #0 */ \ rcache_free_tmp(tmp_); \ -} +} while (0) -#define emith_write_sr(sr, srcr) { \ +#define emith_sh2_delay_loop(cycles, reg) do { \ + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \ + int t1 = rcache_get_tmp(); \ + int t2 = rcache_get_tmp(); \ + int t3 = rcache_get_tmp(); \ + /* if (sr < 0) return */ \ + emith_cmp_r_imm(sr, 0); \ + EMITH_JMP_START(DCOND_LE); \ + /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \ + emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ + rcache_free_tmp(t3); \ + if (reg >= 0) { \ + /* if (reg <= turns) turns = reg-1 */ \ + t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ + emith_cmp_r_r(t3, t2); \ + emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \ + /* if (reg <= 1) turns = 0 */ \ + emith_cmp_r_imm(t3, 1); \ + emith_move_r_imm_c(DCOND_LS, t2, 0); \ + /* reg -= turns */ \ + emith_sub_r_r(t3, t2); \ + } \ + /* sr.cycles -= turns * cycles; */ \ + emith_move_r_imm(t1, cycles); \ + emith_mul(t1, t2, t1); \ + emith_sub_r_r_r_lsl(sr, sr, t1, 12); \ + EMITH_JMP_END(DCOND_LE); \ + rcache_free_tmp(t1); \ + rcache_free_tmp(t2); \ +} while (0) + +#define emith_write_sr(sr, srcr) do { \ emith_lsr(sr, sr, 10); \ emith_or_r_r_r_lsl(sr, sr, srcr, 22); \ emith_ror(sr, sr, 22); \ -} +} while (0) -#define emith_carry_to_t(srr, is_sub) { \ - if (is_sub) { /* has inverted C on ARM */ \ +#define emith_carry_to_t(srr, is_sub) do { \ + emith_bic_r_imm(srr, 1); \ + if (is_sub) /* has inverted C on ARM */ \ emith_or_r_imm_c(A_COND_CC, srr, 1); \ - emith_bic_r_imm_c(A_COND_CS, srr, 1); \ - } else { \ + else \ emith_or_r_imm_c(A_COND_CS, srr, 1); \ - emith_bic_r_imm_c(A_COND_CC, srr, 1); \ - } \ -} +} while (0) -#define emith_tpop_carry(sr, is_sub) { \ +#define emith_t_to_carry(srr, is_sub) do { \ + if (is_sub) { \ + int t_ = rcache_get_tmp(); \ + emith_eor_r_r_imm(t_, srr, 1); \ + emith_rorf(t_, t_, 1); \ + rcache_free_tmp(t_); \ + } else { \ + emith_rorf(srr, srr, 1); \ + emith_rol(srr, srr, 1); \ + } \ +} while (0) + +#define emith_tpop_carry(sr, is_sub) do { \ if (is_sub) \ emith_eor_r_imm(sr, 1); \ emith_lsrf(sr, sr, 1); \ -} +} while (0) -#define emith_tpush_carry(sr, is_sub) { \ +#define emith_tpush_carry(sr, is_sub) do { \ emith_adc_r_r(sr, sr); \ if (is_sub) \ emith_eor_r_imm(sr, 1); \ -} +} while (0) /* + * T = carry(Rn = (Rn << 1) | T) * if Q - * t = carry(Rn += Rm) + * T ^= !carry(Rn += Rm) * else - * t = carry(Rn -= Rm) - * T ^= t + * T ^= !carry(Rn -= Rm) */ -#define emith_sh2_div1_step(rn, rm, sr) { \ +#define emith_sh2_div1_step(rn, rm, sr) do { \ void *jmp0, *jmp1; \ + emith_tpop_carry(sr, 0); /* Rn = 2*Rn+T */\ + emith_adcf_r_r_r(rn, rn, rn); \ + emith_tpush_carry(sr, 0); \ emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ JMP_POS(jmp0); /* beq do_sub */ \ - emith_addf_r_r(rn, rm); \ - emith_eor_r_imm_c(A_COND_CS, sr, T); \ + emith_addf_r_r(rn, rm); /* Rn += Rm */ \ + emith_eor_r_imm_c(A_COND_CC, sr, T); \ JMP_POS(jmp1); /* b done */ \ JMP_EMIT(A_COND_EQ, jmp0); /* do_sub: */ \ - emith_subf_r_r(rn, rm); \ - emith_eor_r_imm_c(A_COND_CC, sr, T); \ + emith_subf_r_r(rn, rm); /* Rn -= Rm */ \ + emith_eor_r_imm_c(A_COND_CS, sr, T); \ JMP_EMIT(A_COND_AL, jmp1); /* done: */ \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP2_START(DCOND_NE); \ + emith_mula_s64_c(DCOND_EQ, ml, mh, rn, rm); \ + EMITH_SJMP2_MID(DCOND_NE); \ + /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \ + emith_sext(mh, mh, 16); \ + emith_mula_s64(ml, mh, rn, rm); \ + /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ + /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ + emith_asr(rn, mh, 15); \ + emith_addf_r_r_r_lsr(rn, rn, mh, 31); \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_MI); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP2_END(DCOND_NE); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP2_START(DCOND_NE); \ + emith_mula_s64_c(DCOND_EQ, ml, mh, rn, rm); \ + EMITH_SJMP2_MID(DCOND_NE); \ + /* XXX: MACH should be untouched when S is set? */ \ + emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \ + emith_mula_s64(ml, mh, rn, rm); \ + /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ + /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ + emith_addf_r_r_r_lsr(mh, mh, ml, 31); /* sum = MACH + ((MACL>>31)&1) */\ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ + /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_MI); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP2_END(DCOND_NE); \ +} while (0) + +#ifdef T +// T bit handling +static int tcond = -1; + +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +#define emith_clr_t_cond(sr) \ + (void)sr + +#define emith_set_t_cond(sr, cond) \ + tcond = cond + +#define emith_get_t_cond() \ + tcond + +#define emith_invalidate_t() \ + tcond = -1 + +#define emith_set_t(sr, val) \ + tcond = ((val) ? A_COND_AL: A_COND_NV) + +static void emith_sync_t(int sr) +{ + if (tcond == A_COND_AL) + emith_or_r_imm(sr, T); + else if (tcond == A_COND_NV) + emith_bic_r_imm(sr, T); + else if (tcond >= 0) { + emith_bic_r_imm(sr, T); + emith_or_r_imm_c(tcond, sr, T); + } + tcond = -1; } +static int emith_tst_t(int sr, int tf) +{ + if (tcond < 0) { + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; + } else if (tcond >= A_COND_AL) { + // MUST sync because A_COND_NV isn't a real condition + emith_sync_t(sr); + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; + } else + return tf ? tcond : emith_invert_cond(tcond); +} +#endif diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c new file mode 100644 index 00000000..39ae82fe --- /dev/null +++ b/cpu/drc/emit_arm64.c @@ -0,0 +1,1432 @@ +/* + * Basic macros to emit ARM A64 instructions and some utils + * Copyright (C) 2019-2024 irixxxx + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ +#define HOST_REGS 32 + +// AAPCS64: params: r0-r7, return: r0-r1, temp: r8-r17, saved: r19-r28 +// reserved: r18 (for platform use), r29 (frame pointer) +#define RET_REG 0 +#define PARAM_REGS { 0, 1, 2, 3, 4, 5, 6, 7 } +#define PRESERVED_REGS { 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 } +#define TEMPORARY_REGS { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 } + +#define CONTEXT_REG 19 +#define STATIC_SH2_REGS { SHR_SR,28 , SHR_R(0),27 , SHR_R(1),26 } + +// R31 doesn't exist, it aliases either with zero or SP +#define SP 31 // stack pointer +#define Z0 31 // zero register +#define LR 30 // link register +#define FP 29 // frame pointer +#define PR 18 // platform register + +// All operations but ptr ops are using the lower 32 bits of the A64 registers. +// The upper 32 bits are only used in ptr ops and are zeroed by A64 32 bit ops. + + +#define A64_COND_EQ 0x0 +#define A64_COND_NE 0x1 +#define A64_COND_HS 0x2 +#define A64_COND_LO 0x3 +#define A64_COND_MI 0x4 +#define A64_COND_PL 0x5 +#define A64_COND_VS 0x6 +#define A64_COND_VC 0x7 +#define A64_COND_HI 0x8 +#define A64_COND_LS 0x9 +#define A64_COND_GE 0xa +#define A64_COND_LT 0xb +#define A64_COND_GT 0xc +#define A64_COND_LE 0xd +#define A64_COND_CS A64_COND_HS +#define A64_COND_CC A64_COND_LO +// "fake" conditions for T bit handling +#define A64_COND_AL 0xe +#define A64_COND_NV 0xf + +// DRC conditions +#define DCOND_EQ A64_COND_EQ +#define DCOND_NE A64_COND_NE +#define DCOND_MI A64_COND_MI +#define DCOND_PL A64_COND_PL +#define DCOND_HI A64_COND_HI +#define DCOND_HS A64_COND_HS +#define DCOND_LO A64_COND_LO +#define DCOND_GE A64_COND_GE +#define DCOND_GT A64_COND_GT +#define DCOND_LT A64_COND_LT +#define DCOND_LS A64_COND_LS +#define DCOND_LE A64_COND_LE +#define DCOND_VS A64_COND_VS +#define DCOND_VC A64_COND_VC + +#define DCOND_CS A64_COND_HS +#define DCOND_CC A64_COND_LO + + +// unified insn +#define A64_INSN(op, b29, b22, b21, b16, b12, b10, b5, b0) \ + (((op)<<25)|((b29)<<29)|((b22)<<22)|((b21)<<21)|((b16)<<16)|((b12)<<12)|((b10)<<10)|((b5)<<5)|((b0)<<0)) + +#define _ 0 // marker for "field unused" + +#define A64_NOP \ + A64_INSN(0xa,0x6,0x4,_,0x3,0x2,_,0,0x1f) // 0xd503201f + +// arithmetic/logical + +enum { OP_AND, OP_OR, OP_EOR, OP_ANDS, OP_ADD, OP_ADDS, OP_SUB, OP_SUBS }; +enum { ST_LSL, ST_LSR, ST_ASR, ST_ROR }; +enum { XT_UXTW=0x4, XT_UXTX=0x6, XT_LSL=0x7, XT_SXTW=0xc, XT_SXTX=0xe }; +#define OP_SZ64 (1 << 31) // bit for 64 bit op selection +#define OP_N64 (1 << 22) // N-bit for 64 bit logical immediate ops + +#define A64_OP_REG(op, n, rd, rn, rm, stype, simm) /* arith+logical, ST_ */ \ + A64_INSN(0x5,(op)&3,((op)&4)|stype,n,rm,_,simm,rn,rd) +#define A64_OP_XREG(op, rd, rn, rm, xtopt, simm) /* arith, XT_ */ \ + A64_INSN(0x5,(op)&3,0x4,1,rm,xtopt,simm,rn,rd) +#define A64_OP_IMM12(op, rd, rn, imm, lsl12) /* arith */ \ + A64_INSN(0x8,(op)&3,((op)&4)|lsl12,_,_,_,(imm)&0xfff,rn,rd) +#define A64_OP_IMMBM(op, rd, rn, immr, imms) /* logical */ \ + A64_INSN(0x9,(op)&3,0x0,_,immr,_,(imms)&0x3f,rn,rd) + +// rd = rn OP (rm SHIFT simm) +#define A64_ADD_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_ADD,0,rd,rn,rm,stype,simm) +#define A64_ADDS_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_ADDS,0,rd,rn,rm,stype,simm) +#define A64_SUB_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_SUB,0,rd,rn,rm,stype,simm) +#define A64_SUBS_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_SUBS,0,rd,rn,rm,stype,simm) + +#define A64_NEG_REG(rd, rm, stype, simm) \ + A64_SUB_REG(rd,Z0,rm,stype,simm) +#define A64_NEGS_REG(rd, rm, stype, simm) \ + A64_SUBS_REG(rd,Z0,rm,stype,simm) +#define A64_NEGC_REG(rd, rm) \ + A64_SBC_REG(rd,Z0,rm) +#define A64_NEGCS_REG(rd, rm) \ + A64_SBCS_REG(rd,Z0,rm) +#define A64_CMP_REG(rn, rm, stype, simm) \ + A64_SUBS_REG(Z0, rn, rm, stype, simm) +#define A64_CMN_REG(rn, rm, stype, simm) \ + A64_ADDS_REG(Z0, rn, rm, stype, simm) + +#define A64_EOR_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_EOR,0,rd,rn,rm,stype,simm) +#define A64_OR_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_OR,0,rd,rn,rm,stype,simm) +#define A64_ORN_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_OR,1,rd,rn,rm,stype,simm) +#define A64_AND_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_AND,0,rd,rn,rm,stype,simm) +#define A64_ANDS_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_ANDS,0,rd,rn,rm,stype,simm) +#define A64_BIC_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_AND,1,rd,rn,rm,stype,simm) +#define A64_BICS_REG(rd, rn, rm, stype, simm) \ + A64_OP_REG(OP_ANDS,1,rd,rn,rm,stype,simm) + +#define A64_TST_REG(rn, rm, stype, simm) \ + A64_ANDS_REG(Z0, rn, rm, stype, simm) +#define A64_MOV_REG(rd, rm, stype, simm) \ + A64_OR_REG(rd, Z0, rm, stype, simm) +#define A64_MVN_REG(rd, rm, stype, simm) \ + A64_ORN_REG(rd, Z0, rm, stype, simm) + +// rd = rn OP (rm EXTEND simm) +#define A64_ADD_XREG(rd, rn, rm, xtopt, simm) \ + A64_OP_XREG(OP_ADD,rd,rn,rm,xtopt,simm) +#define A64_ADDS_XREG(rd, rn, rm, xtopt, simm) \ + A64_OP_XREG(OP_ADDS,rd,rn,rm,xtopt,simm) +#define A64_SUB_XREG(rd, rn, rm, stype, simm) \ + A64_OP_XREG(OP_SUB,rd,rn,rm,xtopt,simm) +#define A64_SUBS_XREG(rd, rn, rm, stype, simm) \ + A64_OP_XREG(OP_SUBS,rd,rn,rm,xtopt,simm) + +// rd = rn OP rm OP carry +#define A64_ADC_REG(rd, rn, rm) \ + A64_INSN(0xd,OP_ADD &3,0x0,_,rm,_,_,rn,rd) +#define A64_ADCS_REG(rd, rn, rm) \ + A64_INSN(0xd,OP_ADDS&3,0x0,_,rm,_,_,rn,rd) +#define A64_SBC_REG(rd, rn, rm) \ + A64_INSN(0xd,OP_SUB &3,0x0,_,rm,_,_,rn,rd) +#define A64_SBCS_REG(rd, rn, rm) \ + A64_INSN(0xd,OP_SUBS&3,0x0,_,rm,_,_,rn,rd) + +// rd = rn SHIFT rm +#define A64_LSL_REG(rd, rn, rm) \ + A64_INSN(0xd,0x0,0x3,_,rm,_,0x8,rn,rd) +#define A64_LSR_REG(rd, rn, rm) \ + A64_INSN(0xd,0x0,0x3,_,rm,_,0xa,rn,rd) +#define A64_ASR_REG(rd, rn, rm) \ + A64_INSN(0xd,0x0,0x3,_,rm,_,0x9,rn,rd) +#define A64_ROR_REG(rd, rn, rm) \ + A64_INSN(0xd,0x0,0x3,_,rm,_,0xb,rn,rd) + +// rd = REVERSE(rn) +#define A64_RBIT_REG(rd, rn) \ + A64_INSN(0xd,0x2,0x3,_,_,_,_,rn,rd) + +// rd = rn OP (imm12 << (0|12)) +#define A64_ADD_IMM(rd, rn, imm12, lsl12) \ + A64_OP_IMM12(OP_ADD, rd, rn, imm12, lsl12) +#define A64_ADDS_IMM(rd, rn, imm12, lsl12) \ + A64_OP_IMM12(OP_ADDS, rd, rn, imm12, lsl12) +#define A64_SUB_IMM(rd, rn, imm12, lsl12) \ + A64_OP_IMM12(OP_SUB, rd, rn, imm12, lsl12) +#define A64_SUBS_IMM(rd, rn, imm12, lsl12) \ + A64_OP_IMM12(OP_SUBS, rd, rn, imm12, lsl12) + +#define A64_CMP_IMM(rn, imm12, lsl12) \ + A64_SUBS_IMM(Z0,rn,imm12,lsl12) +#define A64_CMN_IMM(rn, imm12, lsl12) \ + A64_ADDS_IMM(Z0,rn,imm12,lsl12) + +// rd = rn OP immbm; immbm is a repeated special pattern of 2^n bits length +#define A64_EOR_IMM(rd, rn, immr, imms) \ + A64_OP_IMMBM(OP_EOR,rd,rn,immr,imms) +#define A64_OR_IMM(rd, rn, immr, imms) \ + A64_OP_IMMBM(OP_OR,rd,rn,immr,imms) +#define A64_AND_IMM(rd, rn, immr, imms) \ + A64_OP_IMMBM(OP_AND,rd,rn,immr,imms) +#define A64_ANDS_IMM(rd, rn, immr, imms) \ + A64_OP_IMMBM(OP_ANDS,rd,rn,immr,imms) +#define A64_TST_IMM(rn, immr, imms) \ + A64_OP_IMMBM(OP_ANDS,Z0,rn,immr,imms) +#define A64_MOV_IMM(rd, rn, immr, imms) \ + A64_OP_IMMBM(OP_OR,rd,Z0,immr,imms) + +// rd = (imm16 << (0|16|32|48)) +#define A64_MOVN_IMM(rd, imm16, lsl16) \ + A64_INSN(0x9,0x0,0x2,lsl16,_,_,_,(imm16)&0xffff,rd) +#define A64_MOVZ_IMM(rd, imm16, lsl16) \ + A64_INSN(0x9,0x2,0x2,lsl16,_,_,_,(imm16)&0xffff,rd) +#define A64_MOVK_IMM(rd, imm16, lsl16) \ + A64_INSN(0x9,0x3,0x2,lsl16,_,_,_,(imm16)&0xffff,rd) +#define A64_MOVT_IMM(rd, imm16, lsl16) \ + A64_INSN(0x9,0x3,0x2,lsl16,_,_,_,(imm16)&0xffff,rd) + +// rd = rn SHIFT imm5/imm6 (for Wn/Xn) +#define A64_LSL_IMM(rd, rn, bits) /* UBFM */ \ + A64_INSN(0x9,0x2,0x4,_,32-(bits),_,31-(bits),rn,rd) +#define A64_LSR_IMM(rd, rn, bits) /* UBFM */ \ + A64_INSN(0x9,0x2,0x4,_,bits,_,31,rn,rd) +#define A64_ASR_IMM(rd, rn, bits) /* SBFM */ \ + A64_INSN(0x9,0x0,0x4,_,bits,_,31,rn,rd) +#define A64_ROR_IMM(rd, rn, bits) /* EXTR */ \ + A64_INSN(0x9,0x0,0x6,_,rn,_,bits,rn,rd) + +#define A64_SXT_IMM(rd, rn, bits) /* SBFM */ \ + A64_INSN(0x9,0x0,0x4,_,0,_,bits-1,rn,rd) +#define A64_UXT_IMM(rd, rn, bits) /* UBFM */ \ + A64_INSN(0x9,0x2,0x4,_,0,_,bits-1,rn,rd) + +#define A64_BFX_IMM(rd, rn, lsb, bits) /* UBFM */ \ + A64_INSN(0x9,0x2,0x4,_,lsb,_,bits-1,rn,rd) +#define A64_BFI_IMM(rd, rn, lsb, bits) /* BFM */ \ + A64_INSN(0x9,0x1,0x4,_,-(lsb)&0x1f,_,bits-1,rn,rd) + +// multiplication + +#define A64_SMULL(rd, rn, rm) /* Xd = Wn*Wm (+ Xa) */ \ + A64_INSN(0xd,0x4,0x4,1,rm,_,Z0,rn,rd) +#define A64_SMADDL(rd, rn, rm, ra) \ + A64_INSN(0xd,0x4,0x4,1,rm,_,ra,rn,rd) +#define A64_UMULL(rd, rn, rm) \ + A64_INSN(0xd,0x4,0x6,1,rm,_,Z0,rn,rd) +#define A64_UMADDL(rd, rn, rm, ra) \ + A64_INSN(0xd,0x4,0x6,1,rm,_,ra,rn,rd) +#define A64_MUL(rd, rn, rm) /* Wd = Wn*Wm (+ Wa) */ \ + A64_INSN(0xd,0x0,0x4,0,rm,_,Z0,rn,rd) +#define A64_MADD(rd, rn, rm, ra) \ + A64_INSN(0xd,0x0,0x4,0,rm,_,ra,rn,rd) + +// branching + +#define A64_B(offs26) \ + A64_INSN(0xa,0x0,_,_,_,_,_,_,(offs26) >> 2) +#define A64_BL(offs26) \ + A64_INSN(0xa,0x4,_,_,_,_,_,_,(offs26) >> 2) +#define A64_BR(rn) \ + A64_INSN(0xb,0x6,_,_,0x1f,_,_,rn,_) +#define A64_BLR(rn) \ + A64_INSN(0xb,0x6,_,_,0x3f,_,_,rn,_) +#define A64_RET(rn) /* same as BR, but hint for cpu */ \ + A64_INSN(0xb,0x6,_,_,0x5f,_,_,rn,_) +#define A64_BCOND(cond, offs19) \ + A64_INSN(0xa,0x2,_,_,_,_,_,(offs19) >> 2,(cond)) + +// conditional select + +#define A64_CINC(cond, rn, rm) \ + A64_INSN(0xd,0x0,0x2,0,rm,(cond)^1,0x1,rm,rn) /* CSINC */ +#define A64_CSET(cond, rn) \ + A64_CINC(cond, rn, Z0) + +// load pc-relative + +#define A64_LDRLIT_IMM(rd, offs19) \ + A64_INSN(0xc,0x0,0x0,_,_,_,_,(offs19) >> 2,rd) +#define A64_LDRXLIT_IMM(rd, offs19) \ + A64_INSN(0xc,0x2,0x0,_,_,_,_,(offs19) >> 2,rd) +#define A64_ADRXLIT_IMM(rd, offs21) \ + A64_INSN(0x8,(offs21)&3,0x0,_,_,_,_,(offs21) >> 2,rd) + +// load/store indexed base. Only the signed unscaled variant is used here. + +enum { LT_ST, LT_LD, LT_LDSX, LT_LDS }; +enum { AM_B=0x1, AM_H=0x3, AM_W=0x5, AM_X=0x7 }; +enum { AM_IDX, AM_IDXPOST, AM_IDXREG, AM_IDXPRE }; +#define A64_LDST_AM(ir,rm,optimm) (((ir)<<9)|((rm)<<4)|((optimm)&0x1ff)) +#define A64_OP_LDST(sz, op, am, mode, rm, rd) \ + A64_INSN(0xc,sz,op,_,_,am,mode,rm,rd) + +#define A64_LDSTX_IMM(rd, rn, offs9, ld, mode) \ + A64_OP_LDST(AM_X,ld,A64_LDST_AM(0,_,offs9),mode,rn,rd) +#define A64_LDST_IMM(rd, rn, offs9, ld, mode) \ + A64_OP_LDST(AM_W,ld,A64_LDST_AM(0,_,offs9),mode,rn,rd) +#define A64_LDSTH_IMM(rd, rn, offs9, ld, mode) \ + A64_OP_LDST(AM_H,ld,A64_LDST_AM(0,_,offs9),mode,rn,rd) +#define A64_LDSTB_IMM(rd, rn, offs9, ld, mode) \ + A64_OP_LDST(AM_B,ld,A64_LDST_AM(0,_,offs9),mode,rn,rd) + +// NB: pre/postindex isn't available with register offset +#define A64_LDSTX_REG(rd, rn, rm, ld, opt) \ + A64_OP_LDST(AM_X,ld,A64_LDST_AM(1,rm,opt),AM_IDXREG,rn,rd) +#define A64_LDST_REG(rd, rn, rm, ld, opt) \ + A64_OP_LDST(AM_W,ld,A64_LDST_AM(1,rm,opt),AM_IDXREG,rn,rd) +#define A64_LDSTH_REG(rd, rn, rm, ld, opt) \ + A64_OP_LDST(AM_H,ld,A64_LDST_AM(1,rm,opt),AM_IDXREG,rn,rd) +#define A64_LDSTB_REG(rd, rn, rm, ld, opt) \ + A64_OP_LDST(AM_B,ld,A64_LDST_AM(1,rm,opt),AM_IDXREG,rn,rd) + +#define A64_LDSTPX_IMM(rn, r1, r2, offs7, ld, mode) \ + A64_INSN(0x4,0x5,(mode<<1)|ld,_,_,(offs7)&0x3f8,r2,rn,r1) + +// 64 bit stuff for pointer handling + +#define A64_ADDX_XREG(rd, rn, rm, xtopt, simm) \ + OP_SZ64|A64_OP_XREG(OP_ADD,rd,rn,rm,xtopt,simm) +#define A64_ADDX_REG(rd, rn, rm, stype, simm) \ + OP_SZ64|A64_ADD_REG(rd, rn, rm, stype, simm) +#define A64_ADDXS_REG(rd, rn, rm, stype, simm) \ + OP_SZ64|A64_ADDS_REG(rd, rn, rm, stype, simm) +#define A64_ORX_REG(rd, rn, rm, stype, simm) \ + OP_SZ64|A64_OR_REG(rd, rn, rm, stype, simm) +#define A64_TSTX_REG(rn, rm, stype, simm) \ + OP_SZ64|A64_TST_REG(rn, rm, stype, simm) +#define A64_MOVX_REG(rd, rm, stype, simm) \ + OP_SZ64|A64_MOV_REG(rd, rm, stype, simm) +#define A64_ADDX_IMM(rd, rn, imm12) \ + OP_SZ64|A64_ADD_IMM(rd, rn, imm12, 0) +#define A64_EORX_IMM(rd, rn, immr, imms) \ + OP_SZ64|OP_N64|A64_EOR_IMM(rd, rn, immr, imms) +#define A64_UXTX_IMM(rd, rn, bits) \ + OP_SZ64|OP_N64|A64_UXT_IMM(rd, rn, bits) +#define A64_LSRX_IMM(rd, rn, bits) \ + OP_SZ64|OP_N64|A64_LSR_IMM(rd, rn, bits)|(63<<10) + + +// XXX: tcache_ptr type for SVP and SH2 compilers differs.. +#define EMIT_PTR(ptr, x) \ + do { \ + *(u32 *)(ptr) = x; \ + ptr = (void *)((u8 *)(ptr) + sizeof(u32)); \ + } while (0) + +#define EMIT(op) \ + do { \ + EMIT_PTR(tcache_ptr, op); \ + COUNT_OP; \ + } while (0) + + +// if-then-else conditional execution helpers +#define JMP_POS(ptr) { \ + ptr = tcache_ptr; \ + EMIT(A64_B(0)); \ +} + +#define JMP_EMIT(cond, ptr) { \ + u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr); \ + EMIT_PTR(ptr, A64_BCOND(cond, val_ & 0x001fffff)); \ +} + +#define JMP_EMIT_NC(ptr) { \ + u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr); \ + EMIT_PTR(ptr, A64_B(val_ & 0x0fffffff)); \ +} + +#define EMITH_JMP_START(cond) { \ + u8 *cond_ptr; \ + JMP_POS(cond_ptr) + +#define EMITH_JMP_END(cond) \ + JMP_EMIT(cond, cond_ptr); \ +} + +#define EMITH_JMP3_START(cond) { \ + u8 *cond_ptr, *else_ptr; \ + JMP_POS(cond_ptr) + +#define EMITH_JMP3_MID(cond) \ + JMP_POS(else_ptr); \ + JMP_EMIT(cond, cond_ptr); + +#define EMITH_JMP3_END() \ + JMP_EMIT_NC(else_ptr); \ +} + +#define EMITH_HINT_COND(cond) /**/ + +// "simple" jump (no more than a few insns) +// ARM32 will use conditional instructions here +#define EMITH_SJMP_START EMITH_JMP_START +#define EMITH_SJMP_END EMITH_JMP_END + +#define EMITH_SJMP3_START EMITH_JMP3_START +#define EMITH_SJMP3_MID EMITH_JMP3_MID +#define EMITH_SJMP3_END EMITH_JMP3_END + +#define EMITH_SJMP2_START(cond) \ + EMITH_SJMP3_START(cond) +#define EMITH_SJMP2_MID(cond) \ + EMITH_SJMP3_MID(cond) +#define EMITH_SJMP2_END(cond) \ + EMITH_SJMP3_END() + + +// data processing, register +#define emith_move_r_r_ptr(d, s) \ + EMIT(A64_MOVX_REG(d, s, ST_LSL, 0)) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_ptr(d, s) + +#define emith_move_r_r(d, s) \ + EMIT(A64_MOV_REG(d, s, ST_LSL, 0)) +#define emith_move_r_r_c(cond, d, s) \ + emith_move_r_r(d, s) + +#define emith_mvn_r_r(d, s) \ + EMIT(A64_MVN_REG(d, s, ST_LSL, 0)) + +#define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm < 4) EMIT(A64_ADDX_XREG(d, s1, s2, XT_SXTW, simm)); \ + else EMIT(A64_ADDX_REG(d, s1, s2, ST_LSL, simm)); \ +} while (0) +#define emith_add_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_ADD_REG(d, s1, s2, ST_LSL, simm)) + +#define emith_addf_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_ADDS_REG(d, s1, s2, ST_LSL, simm)) + +#define emith_addf_r_r_r_lsr(d, s1, s2, simm) \ + EMIT(A64_ADDS_REG(d, s1, s2, ST_LSR, simm)) + +#define emith_adc_r_r_r_lsl(d, s1, s2, simm) \ + if (simm) { int _t = rcache_get_tmp(); \ + emith_lsl(_t, s2, simm); \ + emith_adc_r_r_r(d, s1, _t); \ + rcache_free_tmp(_t); \ + } else \ + emith_adc_r_r_r(d, s1, s2); \ +} while (0) + +#define emith_sbc_r_r_r_lsl(d, s1, s2, simm) \ + if (simm) { int _t = rcache_get_tmp(); \ + emith_lsl(_t, s2, simm); \ + emith_sbc_r_r_r(d, s1, _t); \ + rcache_free_tmp(_t); \ + } else \ + emith_sbc_r_r_r(d, s1, s2); \ +} while (0) + +#define emith_sub_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_SUB_REG(d, s1, s2, ST_LSL, simm)) + +#define emith_subf_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_SUBS_REG(d, s1, s2, ST_LSL, simm)) + +#define emith_or_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_OR_REG(d, s1, s2, ST_LSL, simm)) +#define emith_or_r_r_r_lsr(d, s1, s2, simm) \ + EMIT(A64_OR_REG(d, s1, s2, ST_LSR, simm)) + +#define emith_eor_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_EOR_REG(d, s1, s2, ST_LSL, simm)) +#define emith_eor_r_r_r_lsr(d, s1, s2, simm) \ + EMIT(A64_EOR_REG(d, s1, s2, ST_LSR, simm)) + +#define emith_and_r_r_r_lsl(d, s1, s2, simm) \ + EMIT(A64_AND_REG(d, s1, s2, ST_LSL, simm)) + +#define emith_or_r_r_lsl(d, s, lslimm) \ + emith_or_r_r_r_lsl(d, d, s, lslimm) +#define emith_or_r_r_lsr(d, s, lsrimm) \ + emith_or_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_eor_r_r_lsl(d, s, lslimm) \ + emith_eor_r_r_r_lsl(d, d, s, lslimm) +#define emith_eor_r_r_lsr(d, s, lsrimm) \ + emith_eor_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_add_r_r_r(d, s1, s2) \ + emith_add_r_r_r_lsl(d, s1, s2, 0) + +#define emith_addf_r_r_r(d, s1, s2) \ + emith_addf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_sub_r_r_r(d, s1, s2) \ + emith_sub_r_r_r_lsl(d, s1, s2, 0) + +#define emith_subf_r_r_r(d, s1, s2) \ + emith_subf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_or_r_r_r(d, s1, s2) \ + emith_or_r_r_r_lsl(d, s1, s2, 0) + +#define emith_eor_r_r_r(d, s1, s2) \ + emith_eor_r_r_r_lsl(d, s1, s2, 0) + +#define emith_add_r_r_r_ptr(d, s1, s2) \ + emith_add_r_r_r_lsl_ptr(d, s1, s2, 0) +#define emith_and_r_r_r(d, s1, s2) \ + emith_and_r_r_r_lsl(d, s1, s2, 0) + +#define emith_add_r_r_ptr(d, s) \ + emith_add_r_r_r_lsl_ptr(d, d, s, 0) +#define emith_add_r_r(d, s) \ + emith_add_r_r_r(d, d, s) + +#define emith_sub_r_r(d, s) \ + emith_sub_r_r_r(d, d, s) + +#define emith_neg_r_r(d, s) \ + EMIT(A64_NEG_REG(d, s, ST_LSL, 0)) + +#define emith_negc_r_r(d, s) \ + EMIT(A64_NEGC_REG(d, s)) + +#define emith_adc_r_r_r(d, s1, s2) \ + EMIT(A64_ADC_REG(d, s1, s2)) + +#define emith_adc_r_r(d, s) \ + EMIT(A64_ADC_REG(d, d, s)) + +#define emith_adcf_r_r_r(d, s1, s2) \ + EMIT(A64_ADCS_REG(d, s1, s2)) + +#define emith_sbc_r_r_r(d, s1, s2) \ + EMIT(A64_SBC_REG(d, s1, s2)) + +#define emith_sbcf_r_r_r(d, s1, s2) \ + EMIT(A64_SBCS_REG(d, s1, s2)) + +#define emith_and_r_r(d, s) \ + emith_and_r_r_r(d, d, s) +#define emith_and_r_r_c(cond, d, s) \ + emith_and_r_r(d, s) + +#define emith_or_r_r(d, s) \ + emith_or_r_r_r(d, d, s) + +#define emith_eor_r_r(d, s) \ + emith_eor_r_r_r(d, d, s) + +#define emith_tst_r_r_ptr(d, s) \ + EMIT(A64_TSTX_REG(d, s, ST_LSL, 0)) +#define emith_tst_r_r(d, s) \ + EMIT(A64_TST_REG(d, s, ST_LSL, 0)) + +#define emith_teq_r_r(d, s) do { \ + int _t = rcache_get_tmp(); \ + emith_eor_r_r_r(_t, d, s); \ + emith_cmp_r_imm(_t, 0); \ + rcache_free_tmp(_t); \ +} while (0) + +#define emith_cmp_r_r(d, s) \ + EMIT(A64_CMP_REG(d, s, ST_LSL, 0)) + +#define emith_addf_r_r(d, s) \ + emith_addf_r_r_r(d, d, s) + +#define emith_subf_r_r(d, s) \ + emith_subf_r_r_r(d, d, s) + +#define emith_adcf_r_r(d, s) \ + emith_adcf_r_r_r(d, d, s) + +#define emith_sbcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, d, s) + +#define emith_negcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, Z0, s) + + +// move immediate + +static void emith_move_imm64(int r, int wx, int64_t imm) +{ + int sz64 = wx ? OP_SZ64:0; + int c, s; + + if (!imm) { + EMIT(sz64|A64_MOVZ_IMM(r, imm, 0)); + return; + } + if (imm && -imm == (u16)-imm) { + EMIT(sz64|A64_MOVN_IMM(r, ~imm, 0)); + return; + } + + for (c = s = 0; s < (wx ? 4:2) && imm; s++, imm >>= 16) + if ((u16)(imm)) { + if (c++) EMIT(sz64|A64_MOVK_IMM(r, imm, s)); + else EMIT(sz64|A64_MOVZ_IMM(r, imm, s)); + } +} + +#define emith_move_r_ptr_imm(r, imm) \ + emith_move_imm64(r, 1, (intptr_t)(imm)) + +#define emith_move_r_imm(r, imm) \ + emith_move_imm64(r, 0, (s32)(imm)) +#define emith_move_r_imm_c(cond, r, imm) \ + emith_move_r_imm(r, imm) + +#define emith_move_r_imm_s8_patchable(r, imm) do { \ + if ((s8)(imm) < 0) \ + EMIT(A64_MOVN_IMM(r, ~(s8)(imm), 0)); \ + else \ + EMIT(A64_MOVZ_IMM(r, (s8)(imm), 0)); \ +} while (0) +#define emith_move_r_imm_s8_patch(ptr, imm) do { \ + u32 *ptr_ = (u32 *)ptr; \ + int r_ = *ptr_ & 0x1f; \ + if ((s8)(imm) < 0) \ + EMIT_PTR(ptr_, A64_MOVN_IMM(r_, ~(s8)(imm), 0)); \ + else \ + EMIT_PTR(ptr_, A64_MOVZ_IMM(r_, (s8)(imm), 0)); \ +} while (0) + +// arithmetic, immediate +static void emith_arith_imm(int op, int wx, int rd, int rn, s32 imm) +{ + u32 sz64 = wx ? OP_SZ64:0; + + if (imm < 0) { + op ^= (OP_ADD ^ OP_SUB); + imm = -imm; + } + if (imm == 0) { + // value 0, must emit if op is *S or source isn't dest + if ((op & 1) || rd != rn) + EMIT(sz64|A64_OP_IMM12(op, rd, rn, 0, 0)); + } else if (imm >> 24) { + // value too large + int _t = rcache_get_tmp(); + emith_move_r_imm(_t, imm); + EMIT(sz64|A64_OP_REG(op, 0, rd, rn, _t, ST_LSL, 0)); + rcache_free_tmp(_t); + } else { + int rs = rn; + if ((imm) & 0x000fff) { + EMIT(sz64|A64_OP_IMM12(op, rd, rs, imm, 0)); rs = rd; + } + if ((imm) & 0xfff000) { + EMIT(sz64|A64_OP_IMM12(op, rd, rs, imm >>12, 1)); + } + } +} + +#define emith_add_r_imm(r, imm) \ + emith_arith_imm(OP_ADD, 0, r, r, imm) +#define emith_add_r_imm_c(cond, r, imm) \ + emith_add_r_imm(r, imm) + +#define emith_addf_r_imm(r, imm) \ + emith_arith_imm(OP_ADDS, 0, r, r, imm) + +#define emith_sub_r_imm(r, imm) \ + emith_arith_imm(OP_SUB, 0, r, r, imm) +#define emith_sub_r_imm_c(cond, r, imm) \ + emith_sub_r_imm(r, imm) + +#define emith_subf_r_imm(r, imm) \ + emith_arith_imm(OP_SUBS, 0, r, r, imm) + + +#define emith_adc_r_imm(r, imm) do { \ + int _t = rcache_get_tmp(); \ + emith_move_r_imm(_t, imm); \ + emith_adc_r_r(r, _t); \ + rcache_free_tmp(_t); \ +} while (0) + +#define emith_adcf_r_imm(r, imm) do { \ + int _t = rcache_get_tmp(); \ + emith_move_r_imm(_t, imm); \ + emith_adcf_r_r(r, _t); \ + rcache_free_tmp(_t); \ +} while (0) + +#define emith_cmp_r_imm(r, imm) do { \ + u32 op_ = OP_SUBS, imm_ = (u8)imm; \ + if ((s8)imm_ < 0) { \ + imm_ = (u8)-imm_; \ + op_ = OP_ADDS; \ + } \ + EMIT(A64_OP_IMM12(op_, Z0, r, imm_, 0)); \ +} while (0) + + +#define emith_add_r_r_ptr_imm(d, s, imm) \ + emith_arith_imm(OP_ADD, 1, d, s, imm) + +#define emith_add_r_r_imm(d, s, imm) \ + emith_arith_imm(OP_ADD, 0, d, s, imm) + +#define emith_sub_r_r_imm(d, s, imm) \ + emith_arith_imm(OP_SUB, 0, d, s, imm) +#define emith_sub_r_r_imm_c(cond, d, s, imm) \ + emith_sub_r_r_imm(d, s, imm) + +#define emith_subf_r_r_imm(d, s, imm) \ + emith_arith_imm(OP_SUBS, 0, d, s, imm) + + +// logical, immediate; the value describes a bitmask, see ARMv8 ArchRefMan +// NB: deal only with simple masks 0{n}1{m}0{o} or 1{n}0{m}1{o}, 0 16) { + emith_move_r_imm(_t, ~imm); + EMIT(sz64|A64_OP_REG(op, 1, rd, rn, _t, ST_LSL, 0)); + } else { + emith_move_r_imm(_t, imm); + EMIT(sz64|A64_OP_REG(op, 0, rd, rn, _t, ST_LSL, 0)); + } + rcache_free_tmp(_t); + } +} + +#define emith_and_r_imm(r, imm) \ + emith_log_imm(OP_AND, 0, r, r, imm) + +#define emith_or_r_imm(r, imm) \ + emith_log_imm(OP_OR, 0, r, r, imm) +#define emith_or_r_imm_c(cond, r, imm) \ + emith_or_r_imm(r, imm) + +#define emith_eor_r_imm_ptr(r, imm) \ + emith_log_imm(OP_EOR, 1, r, r, imm) +#define emith_eor_r_imm_ptr_c(cond, r, imm) \ + emith_eor_r_imm_ptr(r, imm) + +#define emith_eor_r_imm(r, imm) \ + emith_log_imm(OP_EOR, 0, r, r, imm) +#define emith_eor_r_imm_c(cond, r, imm) \ + emith_eor_r_imm(r, imm) + +/* NB: BIC #imm not available in A64; use AND #~imm instead */ +#define emith_bic_r_imm(r, imm) \ + emith_log_imm(OP_AND, 0, r, r, ~(imm)) +#define emith_bic_r_imm_c(cond, r, imm) \ + emith_bic_r_imm(r, imm) + +#define emith_tst_r_imm(r, imm) \ + emith_log_imm(OP_ANDS, 0, Z0, r, imm) +#define emith_tst_r_imm_c(cond, r, imm) \ + emith_tst_r_imm(r, imm) + +#define emith_and_r_r_imm(d, s, imm) \ + emith_log_imm(OP_AND, 0, d, s, imm) + +#define emith_or_r_r_imm(d, s, imm) \ + emith_log_imm(OP_OR, 0, d, s, imm) + +#define emith_eor_r_r_imm(d, s, imm) \ + emith_log_imm(OP_EOR, 0, d, s, imm) + + +// shift +#define emith_lsl(d, s, cnt) \ + EMIT(A64_LSL_IMM(d, s, cnt)) + +#define emith_lsr(d, s, cnt) \ + EMIT(A64_LSR_IMM(d, s, cnt)) + +#define emith_asr(d, s, cnt) \ + EMIT(A64_ASR_IMM(d, s, cnt)) + +#define emith_ror(d, s, cnt) \ + EMIT(A64_ROR_IMM(d, s, cnt)) +#define emith_ror_c(cond, d, s, cnt) \ + emith_ror(d, s, cnt) + +#define emith_rol(d, s, cnt) \ + EMIT(A64_ROR_IMM(d, s, 32-(cnt))) + +// NB: shift with carry not directly supported in A64 :-|. +#define emith_lslf(d, s, cnt) do { \ + if ((cnt) > 1) { \ + emith_lsl(d, s, cnt-1); \ + emith_addf_r_r_r(d, d, d); \ + } else if ((cnt) > 0) \ + emith_addf_r_r_r(d, s, s); \ +} while (0) + +#define emith_lsrf(d, s, cnt) do { \ + EMIT(A64_RBIT_REG(d, s)); \ + emith_lslf(d, d, cnt); \ + EMIT(A64_RBIT_REG(d, d)); \ +} while (0) + +#define emith_asrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_asr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_addf_r_r_r(Z0, _s, _s); \ + EMIT(A64_RBIT_REG(d, _s)); \ + emith_adcf_r_r_r(d, d, d); \ + EMIT(A64_RBIT_REG(d, d)); \ + } \ +} while (0) + +#define emith_rolf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_rol(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_addf_r_r_r(d, _s, _s); \ + emith_adc_r_r_r(d, d, Z0); \ + } \ +} while (0) + +#define emith_rorf(d, s, cnt) do { \ + if ((cnt) > 0) { \ + emith_ror(d, s, cnt); \ + emith_addf_r_r_r(Z0, d, d); \ + } \ +} while (0) + +#define emith_rolcf(d) \ + emith_adcf_r_r(d, d) +#define emith_rolc(d) \ + emith_adc_r_r(d, d) + +#define emith_rorcf(d) do { \ + EMIT(A64_RBIT_REG(d, d)); \ + emith_adcf_r_r(d, d); \ + EMIT(A64_RBIT_REG(d, d)); \ +} while (0) +#define emith_rorc(d) do { \ + EMIT(A64_RBIT_REG(d, d)); \ + emith_adc_r_r(d, d); \ + EMIT(A64_RBIT_REG(d, d)); \ +} while (0) + +// signed/unsigned extend +#define emith_clear_msb(d, s, count) /* bits to clear */ \ + EMIT(A64_UXT_IMM(d, s, 32-(count))) +#define emith_clear_msb_c(cond, d, s, count) \ + emith_clear_msb(d, s, count) + +#define emith_sext(d, s, count) /* bits to keep */ \ + EMIT(A64_SXT_IMM(d, s, count)) + +// multiply Rd = Rn*Rm (+ Ra) +#define emith_mul(d, s1, s2) \ + EMIT(A64_MUL(d, s1, s2)) + +// NB: must combine/split Xd from/into 2 Wd's; play safe and clear upper bits +#define emith_combine64(dlo, dhi) \ + EMIT(A64_UXTX_IMM(dlo, dlo, 32)); \ + EMIT(A64_ORX_REG(dlo, dlo, dhi, ST_LSL, 32)); + +#define emith_split64(dlo, dhi) \ + EMIT(A64_LSRX_IMM(dhi, dlo, 32)); \ + EMIT(A64_UXTX_IMM(dlo, dlo, 32)); + +#define emith_mul_u64(dlo, dhi, s1, s2) do { \ + EMIT(A64_UMULL(dlo, s1, s2)); \ + emith_split64(dlo, dhi); \ +} while (0) + +#define emith_mul_s64(dlo, dhi, s1, s2) do { \ + EMIT(A64_SMULL(dlo, s1, s2)); \ + emith_split64(dlo, dhi); \ +} while (0) + +#define emith_mula_s64(dlo, dhi, s1, s2) do { \ + emith_combine64(dlo, dhi); \ + EMIT(A64_SMADDL(dlo, s1, s2, dlo)); \ + emith_split64(dlo, dhi); \ +} while (0) +#define emith_mula_s64_c(cond, dlo, dhi, s1, s2) \ + emith_mula_s64(dlo, dhi, s1, s2) + +// load/store. offs has 9 bits signed, hence larger offs may use a temp +static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) +{ + if (o9 >= -256 && o9 < 256) { + EMIT(A64_OP_LDST(sz, ld, A64_LDST_AM(0,_,o9), mode, rn, rd)); + } else if (mode == AM_IDXPRE) { + emith_add_r_r_ptr_imm(rn, rn, o9); + EMIT(A64_OP_LDST(sz, ld, A64_LDST_AM(0,_,0), AM_IDX, rn, rd)); + } else if (mode == AM_IDXPOST) { + EMIT(A64_OP_LDST(sz, ld, A64_LDST_AM(0,_,0), AM_IDX, rn, rd)); + emith_add_r_r_ptr_imm(rn, rn, o9); + } else { + int _t = rcache_get_tmp(); + emith_add_r_r_ptr_imm(_t, rn, o9); + EMIT(A64_OP_LDST(sz, ld, A64_LDST_AM(0,_,0), AM_IDX, _t, rd)); + rcache_free_tmp(_t); + } +} + +#define emith_read_r_r_offs_ptr(r, rs, offs) \ + emith_ldst_offs(AM_X, r, rs, offs, LT_LD, AM_IDX) +#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_read_r_r_offs_ptr(r, rs, offs) + +#define emith_read_r_r_offs(r, rs, offs) \ + emith_ldst_offs(AM_W, r, rs, offs, LT_LD, AM_IDX) +#define emith_read_r_r_offs_c(cond, r, rs, offs) \ + emith_read_r_r_offs(r, rs, offs) + +#define emith_read_r_r_r_ptr(r, rs, rm) \ + EMIT(A64_LDSTX_REG(r, rs, rm, LT_LD, XT_SXTW)) + +#define emith_read_r_r_r(r, rs, rm) \ + EMIT(A64_LDST_REG(r, rs, rm, LT_LD, XT_SXTW)) +#define emith_read_r_r_r_c(cond, r, rs, rm) \ + emith_read_r_r_r(r, rs, rm) + +#define emith_read8_r_r_offs(r, rs, offs) \ + emith_ldst_offs(AM_B, r, rs, offs, LT_LD, AM_IDX) +#define emith_read8_r_r_offs_c(cond, r, rs, offs) \ + emith_read8_r_r_offs(r, rs, offs) + +#define emith_read8_r_r_r(r, rs, rm) \ + EMIT(A64_LDSTB_REG(r, rs, rm, LT_LD, XT_SXTW)) +#define emith_read8_r_r_r_c(cond, r, rs, rm) \ + emith_read8_r_r_r(r, rs, rm) + +#define emith_read16_r_r_offs(r, rs, offs) \ + emith_ldst_offs(AM_H, r, rs, offs, LT_LD, AM_IDX) +#define emith_read16_r_r_offs_c(cond, r, rs, offs) \ + emith_read16_r_r_offs(r, rs, offs) + +#define emith_read16_r_r_r(r, rs, rm) \ + EMIT(A64_LDSTH_REG(r, rs, rm, LT_LD, XT_SXTW)) +#define emith_read16_r_r_r_c(cond, r, rs, rm) \ + emith_read16_r_r_r(r, rs, rm) + +#define emith_read8s_r_r_offs(r, rs, offs) \ + emith_ldst_offs(AM_B, r, rs, offs, LT_LDS, AM_IDX) +#define emith_read8s_r_r_offs_c(cond, r, rs, offs) \ + emith_read8s_r_r_offs(r, rs, offs) + +#define emith_read8s_r_r_r(r, rs, rm) \ + EMIT(A64_LDSTB_REG(r, rs, rm, LT_LDS, XT_SXTW)) +#define emith_read8s_r_r_r_c(cond, r, rs, rm) \ + emith_read8s_r_r_r(r, rs, rm) + +#define emith_read16s_r_r_offs(r, rs, offs) \ + emith_ldst_offs(AM_H, r, rs, offs, LT_LDS, AM_IDX) +#define emith_read16s_r_r_offs_c(cond, r, rs, offs) \ + emith_read16s_r_r_offs(r, rs, offs) + +#define emith_read16s_r_r_r(r, rs, rm) \ + EMIT(A64_LDSTH_REG(r, rs, rm, LT_LDS, XT_SXTW)) +#define emith_read16s_r_r_r_c(cond, r, rs, rm) \ + emith_read16s_r_r_r(r, rs, rm) + + +#define emith_write_r_r_offs_ptr(r, rs, offs) \ + emith_ldst_offs(AM_X, r, rs, offs, LT_ST, AM_IDX) +#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_write_r_r_offs_ptr(r, rs, offs) + +#define emith_write_r_r_r_ptr(r, rs, rm) \ + EMIT(A64_LDSTX_REG(r, rs, rm, LT_ST, XT_SXTW)) +#define emith_write_r_r_r_ptr_c(cond, r, rs, rm) \ + emith_write_r_r_r_ptr(r, rs, rm) + +#define emith_write_r_r_offs(r, rs, offs) \ + emith_ldst_offs(AM_W, r, rs, offs, LT_ST, AM_IDX) +#define emith_write_r_r_offs_c(cond, r, rs, offs) \ + emith_write_r_r_offs(r, rs, offs) + +#define emith_write_r_r_r(r, rs, rm) \ + EMIT(A64_LDST_REG(r, rs, rm, LT_ST, XT_SXTW)) +#define emith_write_r_r_r_c(cond, r, rs, rm) \ + emith_write_r_r_r(r, rs, rm) + +#define emith_ctx_read_ptr(r, offs) \ + emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_read(r, offs) \ + emith_read_r_r_offs(r, CONTEXT_REG, offs) +#define emith_ctx_read_c(cond, r, offs) \ + emith_ctx_read(r, offs) + +#define emith_ctx_write_ptr(r, offs) \ + emith_write_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_write(r, offs) \ + emith_write_r_r_offs(r, CONTEXT_REG, offs) + +#define emith_ctx_read_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_read(r_, offs_); \ +} while (0) + +#define emith_ctx_write_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_write(r_, offs_); \ +} while (0) + +// push pairs; NB: SP must be 16 byte aligned (HW requirement!) +#define emith_push2(r1, r2) \ + EMIT(A64_LDSTPX_IMM(SP, r1, r2, -2*8, LT_ST, AM_IDXPRE)) +#define emith_pop2(r1, r2) \ + EMIT(A64_LDSTPX_IMM(SP, r1, r2, 2*8, LT_LD, AM_IDXPOST)) + +// function call handling +#define emith_save_caller_regs(mask) do { \ + int _c, _r1, _r2; u32 _m = mask & 0x3ffff; \ + if (__builtin_parity(_m) == 1) _m |= 0x40000; /* hardware align */ \ + for (_c = HOST_REGS-1, _r1 = -1; _m && _c >= 0; _m &= ~(1 << _c), _c--)\ + if (_m & (1 << _c)) { \ + _r2 = _r1, _r1 = _c; \ + if (_r2 != -1) { \ + emith_push2(_r1, _r2); \ + _r1 = -1; \ + } \ + } \ +} while (0) + +#define emith_restore_caller_regs(mask) do { \ + int _c, _r1, _r2; u32 _m = mask & 0x3ffff; \ + if (__builtin_parity(_m) == 1) _m |= 0x40000; /* hardware align */ \ + for (_c = 0, _r1 = -1; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) { \ + _r2 = _r1, _r1 = _c; \ + if (_r2 != -1) { \ + emith_pop2(_r2, _r1); \ + _r1 = -1; \ + } \ + } \ +} while (0) + +#define host_call(addr, args) \ + addr + +#define host_arg2reg(rd, arg) \ + rd = arg + +#define emith_pass_arg_r(arg, reg) \ + emith_move_r_r_ptr(arg, reg) + +#define emith_pass_arg_imm(arg, imm) \ + emith_move_r_ptr_imm(arg, imm) + +// branching; NB: A64 B.cond has only +/- 1MB range + +#define emith_jump(target) do {\ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + EMIT(A64_B(disp_ & 0x0fffffff)); \ +} while (0) + +#define emith_jump_patchable(target) \ + emith_jump(target) + +#define emith_jump_cond(cond, target) do { \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + EMIT(A64_BCOND(cond, disp_ & 0x001fffff)); \ +} while (0) + +#define emith_jump_cond_patchable(cond, target) \ + emith_jump_cond(cond, target) + +#define emith_jump_cond_inrange(target) \ + !(((u8 *)target - (u8 *)tcache_ptr + 0x100000) >> 21) + +#define emith_jump_patch(ptr, target, pos) do { \ + u32 *ptr_ = (u32 *)ptr; \ + u32 disp_ = (u8 *)target - (u8 *)ptr, mask_; \ + if ((*ptr_ & 0xff000000) == 0x54000000) \ + mask_ = 0xff00001f, disp_ <<= 5; /* B.cond, range 21 bit */ \ + else mask_ = 0xfc000000; /* B[L], range 28 bit */ \ + EMIT_PTR(ptr_, (*ptr_ & mask_) | ((disp_ >> 2) & ~mask_)); \ + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \ +} while (0) + +#define emith_jump_patch_inrange(ptr, target) \ + !(((u8 *)target - (u8 *)ptr + 0x100000) >> 21) +#define emith_jump_patch_size() 4 + +#define emith_jump_at(ptr, target) do { \ + u32 *ptr_ = (u32 *)ptr; \ + u32 disp_ = (u8 *)target - (u8 *)ptr; \ + EMIT_PTR(ptr_, A64_B(disp_ & 0x0fffffff)); \ +} while (0) +#define emith_jump_at_size() 4 + +#define emith_jump_reg(r) \ + EMIT(A64_BR(r)) +#define emith_jump_reg_c(cond, r) \ + emith_jump_reg(r) + +#define emith_jump_ctx(offs) do { \ + int _t = rcache_get_tmp(); \ + emith_ctx_read_ptr(_t, offs); \ + emith_jump_reg(_t); \ + rcache_free_tmp(_t); \ +} while (0) +#define emith_jump_ctx_c(cond, offs) \ + emith_jump_ctx(offs) + +#define emith_call(target) do { \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + EMIT(A64_BL(disp_ & 0x0fffffff)); \ +} while (0) +#define emith_call_cond(cond, target) \ + emith_call(target) + +#define emith_call_reg(r) \ + EMIT(A64_BLR(r)) + +#define emith_abicall_ctx(offs) do { \ + int _t = rcache_get_tmp(); \ + emith_ctx_read_ptr(_t, offs); \ + emith_call_reg(_t); \ + rcache_free_tmp(_t); \ +} while (0) + +#define emith_abijump_reg(r) \ + emith_jump_reg(r) +#define emith_abijump_reg_c(cond, r) \ + emith_abijump_reg(r) +#define emith_abicall(target) \ + emith_call(target) +#define emith_abicall_cond(cond, target) \ + emith_abicall(target) +#define emith_abicall_reg(r) \ + emith_call_reg(r) + +#define emith_call_cleanup() /**/ + +#define emith_ret() \ + EMIT(A64_RET(LR)) +#define emith_ret_c(cond) \ + emith_ret() + +#define emith_ret_to_ctx(offs) \ + emith_ctx_write_ptr(LR, offs) + +#define emith_add_r_ret(r) \ + emith_add_r_r_r_ptr(r, LR, r) + +// NB: pushes r or r18 for SP hardware alignment +#define emith_push_ret(r) do { \ + int r_ = (r >= 0 ? r : 18); \ + emith_push2(r_, LR); \ +} while (0) + +#define emith_pop_and_ret(r) do { \ + int r_ = (r >= 0 ? r : 18); \ + emith_pop2(r_, LR); \ + emith_ret(); \ +} while (0) + + +// emitter ABI stuff +#define emith_pool_check() /**/ +#define emith_pool_commit(j) /**/ +#define emith_insn_ptr() ((u8 *)tcache_ptr) +#define emith_flush() /**/ +#define host_instructions_updated(base, end, force) \ + do { if (force) __builtin___clear_cache(base, end); } while (0) +#define emith_update_cache() /**/ +#define emith_rw_offs_max() 0x1ff +#define emith_uext_ptr(r) /**/ + + +// SH2 drc specific +#define emith_sh2_drc_entry() do { \ + emith_push2(LR, FP); \ + emith_push2(28, 27); \ + emith_push2(26, 25); \ + emith_push2(24, 23); \ + emith_push2(22, 21); \ + emith_push2(20, 19); \ +} while (0) +#define emith_sh2_drc_exit() do { \ + emith_pop2(20, 19); \ + emith_pop2(22, 21); \ + emith_pop2(24, 23); \ + emith_pop2(26, 25); \ + emith_pop2(28, 27); \ + emith_pop2(LR, FP); \ + emith_ret(); \ +} while (0) + +// NB: assumes a is in arg0, tab, func and mask are temp +#define emith_sh2_rcall(a, tab, func, mask) do { \ + emith_lsr(mask, a, SH2_READ_SHIFT); \ + EMIT(A64_ADDX_REG(tab, tab, mask, ST_LSL, 4)); \ + emith_read_r_r_offs_ptr(func, tab, 0); \ + emith_read_r_r_offs(mask, tab, 8); \ + EMIT(A64_ADDXS_REG(func, func, func, ST_LSL, 0)); \ +} while (0) + +// NB: assumes a, val are in arg0 and arg1, tab and func are temp +#define emith_sh2_wcall(a, val, tab, func) do { \ + emith_lsr(func, a, SH2_WRITE_SHIFT); \ + emith_lsl(func, func, 3); \ + emith_read_r_r_r_ptr(func, tab, func); \ + emith_move_r_r_ptr(2, CONTEXT_REG); /* arg2 */ \ + emith_abijump_reg(func); \ +} while (0) + +#define emith_sh2_delay_loop(cycles, reg) do { \ + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \ + int t1 = rcache_get_tmp(); \ + int t2 = rcache_get_tmp(); \ + int t3 = rcache_get_tmp(); \ + /* if (sr < 0) return */ \ + emith_cmp_r_imm(sr, 0); \ + EMITH_JMP_START(DCOND_LE); \ + /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \ + emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ + rcache_free_tmp(t3); \ + if (reg >= 0) { \ + /* if (reg <= turns) turns = reg-1 */ \ + t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ + emith_cmp_r_r(t3, t2); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \ + EMITH_SJMP_END(DCOND_HI); \ + /* if (reg <= 1) turns = 0 */ \ + emith_cmp_r_imm(t3, 1); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_move_r_imm_c(DCOND_LS, t2, 0); \ + EMITH_SJMP_END(DCOND_HI); \ + /* reg -= turns */ \ + emith_sub_r_r(t3, t2); \ + } \ + /* sr.cycles -= turns * cycles; */ \ + emith_move_r_imm(t1, cycles); \ + emith_mul(t1, t2, t1); \ + emith_sub_r_r_r_lsl(sr, sr, t1, 12); \ + EMITH_JMP_END(DCOND_LE); \ + rcache_free_tmp(t1); \ + rcache_free_tmp(t2); \ +} while (0) + +/* + * T = carry(Rn = (Rn << 1) | T) + * if Q + * t = !carry(Rn += Rm) + * else + * t = !carry(Rn -= Rm) + * T ^= t + */ +#define emith_sh2_div1_step(rn, rm, sr) do { \ + int tmp_ = rcache_get_tmp(); \ + emith_tpop_carry(sr, 0); \ + emith_adcf_r_r_r(rn, rn, rn); \ + emith_tpush_carry(sr, 0); \ + emith_tst_r_imm(sr, Q); \ + EMITH_SJMP3_START(DCOND_EQ); \ + emith_addf_r_r(rn, rm); \ + emith_adc_r_r_r(tmp_, Z0, Z0); \ + emith_eor_r_imm(tmp_, 1); \ + EMITH_SJMP3_MID(DCOND_EQ); \ + emith_subf_r_r(rn, rm); \ + emith_adc_r_r_r(tmp_, Z0, Z0); \ + EMITH_SJMP3_END(); \ + emith_eor_r_r(sr, tmp_); \ + rcache_free_tmp(tmp_); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \ + emith_sext(mh, mh, 16); \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ + /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ + emith_asr(rn, mh, 15); \ + emith_addf_r_r_r_lsr(rn, rn, mh, 31); \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_MI); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* XXX: MACH should be untouched when S is set? */ \ + emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ + /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ + emith_addf_r_r_r_lsr(rn, mh, ml, 31); /* sum = MACH + (MACL>>31) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ + /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_MI); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +#define emith_write_sr(sr, srcr) \ + EMIT(A64_BFI_IMM(sr, srcr, 0, 10)) + +#define emith_carry_to_t(srr, is_sub) do { \ + emith_lsr(sr, sr, 1); \ + emith_adc_r_r(sr, sr); \ + if (is_sub) /* SUB has inverted C on ARM */ \ + emith_eor_r_imm(sr, 1); \ +} while (0) + +#define emith_t_to_carry(srr, is_sub) do { \ + if (is_sub) { \ + int t_ = rcache_get_tmp(); \ + emith_eor_r_r_imm(t_, srr, 1); \ + emith_rorf(t_, t_, 1); \ + rcache_free_tmp(t_); \ + } else { \ + emith_rorf(srr, srr, 1); \ + emith_rol(srr, srr, 1); \ + } \ +} while (0) + +#define emith_tpop_carry(sr, is_sub) do { \ + if (is_sub) \ + emith_eor_r_imm(sr, 1); \ + emith_ror(sr, sr, 1); \ + emith_addf_r_r(sr, sr); \ +} while (0) + +#define emith_tpush_carry(sr, is_sub) do { \ + emith_adc_r_r(sr, Z0); \ + if (is_sub) \ + emith_eor_r_imm(sr, 1); \ +} while (0) + +#ifdef T +// T bit handling +static int tcond = -1; + +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +#define emith_clr_t_cond(sr) \ + (void)sr + +#define emith_set_t_cond(sr, cond) \ + tcond = cond + +#define emith_get_t_cond() \ + tcond + +#define emith_invalidate_t() \ + tcond = -1 + +#define emith_set_t(sr, val) \ + tcond = ((val) ? A64_COND_AL: A64_COND_NV) + +static void emith_sync_t(int sr) +{ + if (tcond == A64_COND_AL) + emith_or_r_imm(sr, T); + else if (tcond == A64_COND_NV) + emith_bic_r_imm(sr, T); + else if (tcond >= 0) { + int tmp = rcache_get_tmp(); + EMIT(A64_CSET(tcond, tmp)); + EMIT(A64_BFI_IMM(sr, tmp, __builtin_ffs(T)-1, 1)); + rcache_free_tmp(tmp); + } + tcond = -1; +} + +static int emith_tst_t(int sr, int tf) +{ + if (tcond < 0) { + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; + } else if (tcond >= A64_COND_AL) { + // MUST sync because A64_COND_AL/NV isn't a real condition + emith_sync_t(sr); + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; + } else + return tf ? tcond : emith_invert_cond(tcond); +} +#endif diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c new file mode 100644 index 00000000..e09bcf87 --- /dev/null +++ b/cpu/drc/emit_mips.c @@ -0,0 +1,1969 @@ +/* + * Basic macros to emit MIPS32/MIPS64 Release 1 or 2 instructions and some utils + * Copyright (C) 2019-2024 irixxxx + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ +#define HOST_REGS 32 + +// MIPS32 ABI: params: r4-r7, return: r2-r3, temp: r1(at),r8-r15,r24-r25,r31(ra) +// saved: r16-r23,r30, reserved: r0(zero), r26-r27(irq), r28(gp), r29(sp) +// r1,r15,r24,r25(at,t7-t9) are used internally by the code emitter +// MIPSN32/MIPS64 ABI: params: r4-r11, no caller-reserved save area on stack +// for PIC code, on function calls r25(t9) must contain the called address +#define RET_REG 2 // v0 +#define PARAM_REGS { 4, 5, 6, 7 } // a0-a3 +#define PRESERVED_REGS { 16, 17, 18, 19, 20, 21, 22, 23 } // s0-s7 +#define TEMPORARY_REGS { 2, 3, 8, 9, 10, 11, 12, 13, 14 } // v0-v1,t0-t6 + +#define CONTEXT_REG 23 // s7 +#define STATIC_SH2_REGS { SHR_SR,22 , SHR_R(0),21 , SHR_R(1),20 } + +// NB: the ubiquitous JZ74[46]0 uses MIPS32 Release 1, a slight MIPS II superset +#ifndef __mips_isa_rev +#define __mips_isa_rev 1 // surprisingly not always defined +#endif + +// registers usable for user code: r1-r25, others reserved or special +#define Z0 0 // zero register +#define CR 25 // call register +#define GP 28 // global pointer +#define SP 29 // stack pointer +#define FP 30 // frame pointer +#define LR 31 // link register +// internally used by code emitter: +#define AT 1 // used to hold intermediate results +#define FNZ 15 // emulated processor flags: N (bit 31) ,Z (all bits) +#define FC 24 // emulated processor flags: C (bit 0), others 0 +#define FV 25 // emulated processor flags: Nt^Ns (bit 31). others x + +// All operations but ptr ops are using the lower 32 bits of the registers. +// The upper 32 bits always contain the sign extension from the lower 32 bits. + +// unified conditions; virtual, not corresponding to anything real on MIPS +#define DCOND_EQ 0x0 +#define DCOND_NE 0x1 +#define DCOND_HS 0x2 +#define DCOND_LO 0x3 +#define DCOND_MI 0x4 +#define DCOND_PL 0x5 +#define DCOND_VS 0x6 +#define DCOND_VC 0x7 +#define DCOND_HI 0x8 +#define DCOND_LS 0x9 +#define DCOND_GE 0xa +#define DCOND_LT 0xb +#define DCOND_GT 0xc +#define DCOND_LE 0xd + +#define DCOND_CS DCOND_LO +#define DCOND_CC DCOND_HS + +// unified insn +#define MIPS_INSN(op, rs, rt, rd, sa, fn) \ + (((op)<<26)|((rs)<<21)|((rt)<<16)|((rd)<<11)|((sa)<<6)|((fn)<<0)) + +#define _ 0 // marker for "field unused" +#define __(n) o##n // enum marker for "undefined" + +// opcode field (encoded in op) +enum { OP__FN=000, OP__RT, OP_J, OP_JAL, OP_BEQ, OP_BNE, OP_BLEZ, OP_BGTZ }; +enum { OP_ADDI=010, OP_ADDIU, OP_SLTI, OP_SLTIU, OP_ANDI, OP_ORI, OP_XORI, OP_LUI }; +enum { OP_DADDI=030, OP_DADDIU, OP_LDL, OP_LDR, OP__FN2=034, OP__FN3=037 }; +enum { OP_LB=040, OP_LH, OP_LWL, OP_LW, OP_LBU, OP_LHU, OP_LWR, OP_LWU }; +enum { OP_SB=050, OP_SH, OP_SWL, OP_SW, OP_SDL, OP_SDR, OP_SWR }; +enum { OP_LD=067, OP_SD=077 }; +// function field (encoded in fn if opcode = OP__FN) +enum { FN_SLL=000, __(01), FN_SRL, FN_SRA, FN_SLLV, __(05), FN_SRLV, FN_SRAV }; +enum { FN_JR=010, FN_JALR, FN_MOVZ, FN_MOVN, FN_SYNC=017 }; +enum { FN_MFHI=020, FN_MTHI, FN_MFLO, FN_MTLO, FN_DSSLV, __(25), FN_DSLRV, FN_DSRAV }; +enum { FN_MULT=030, FN_MULTU, FN_DIV, FN_DIVU, FN_DMULT, FN_DMULTU, FN_DDIV, FN_DDIVU }; +enum { FN_ADD=040, FN_ADDU, FN_SUB, FN_SUBU, FN_AND, FN_OR, FN_XOR, FN_NOR }; +enum { FN_SLT=052, FN_SLTU, FN_DADD, FN_DADDU, FN_DSUB, FN_DSUBU }; +enum { FN_DSLL=070, __(71), FN_DSRL, FN_DSRA, FN_DSLL32, __(75), FN_DSRL32, FN_DSRA32 }; +// function field (encoded in fn if opcode = OP__FN2) +enum { FN2_MADD=000, FN2_MADDU, FN2_MUL, __(03), FN2_MSUB, FN2_MSUBU }; +enum { FN2_CLZ=040, FN2_CLO, FN2_DCLZ=044, FN2_DCLO }; +// function field (encoded in fn if opcode = OP__FN3) +enum { FN3_EXT=000, FN3_DEXTM, FN3_DEXTU, FN3_DEXT, FN3_INS, FN3_DINSM, FN3_DINSU, FN3_DINS }; +enum { FN3_BSHFL=040, FN3_DBSHFL=044 }; +// rt field (encoded in rt if opcode = OP__RT) +enum { RT_BLTZ=000, RT_BGEZ, RT_BLTZAL=020, RT_BGEZAL, RT_SYNCI=037 }; + +// bit shuffle function (encoded in sa if function = FN3_BSHFL) +enum { BS_SBH=002, BS_SHD=005, BS_SEB=020, BS_SEH=030 }; +// r (rotate) bit function (encoded in rs/sa if function = FN_SRL/FN_SRLV) +enum { RB_SRL=0, RB_ROTR=1 }; + +#define MIPS_NOP 000 // null operation: SLL r0, r0, #0 + +// arithmetic/logical + +#define MIPS_OP_REG(op, sa, rd, rs, rt) \ + MIPS_INSN(OP__FN, rs, rt, rd, sa, op) // R-type, SPECIAL +#define MIPS_OP2_REG(op, sa, rd, rs, rt) \ + MIPS_INSN(OP__FN2, rs, rt, rd, sa, op) // R-type, SPECIAL2 +#define MIPS_OP3_REG(op, sa, rd, rs, rt) \ + MIPS_INSN(OP__FN3, rs, rt, rd, sa, op) // R-type, SPECIAL3 +#define MIPS_OP_IMM(op, rt, rs, imm) \ + MIPS_INSN(op, rs, rt, _, _, (u16)(imm)) // I-type + +// rd = rs OP rt +#define MIPS_ADD_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_ADDU,_, rd, rs, rt) +#define MIPS_DADD_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_DADDU,_, rd, rs, rt) +#define MIPS_SUB_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_SUBU,_, rd, rs, rt) +#define MIPS_DSUB_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_DSUBU,_, rd, rs, rt) + +#define MIPS_NEG_REG(rd, rt) \ + MIPS_SUB_REG(rd, Z0, rt) + +#define MIPS_XOR_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_XOR,_, rd, rs, rt) +#define MIPS_OR_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_OR,_, rd, rs, rt) +#define MIPS_AND_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_AND,_, rd, rs, rt) +#define MIPS_NOR_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_NOR,_, rd, rs, rt) + +#define MIPS_MOVE_REG(rd, rs) \ + MIPS_OR_REG(rd, rs, Z0) +#define MIPS_MVN_REG(rd, rs) \ + MIPS_NOR_REG(rd, rs, Z0) + +// rd = rt SHIFT rs +#define MIPS_LSL_REG(rd, rt, rs) \ + MIPS_OP_REG(FN_SLLV,_, rd, rs, rt) +#define MIPS_LSR_REG(rd, rt, rs) \ + MIPS_OP_REG(FN_SRLV,RB_SRL, rd, rs, rt) +#define MIPS_ASR_REG(rd, rt, rs) \ + MIPS_OP_REG(FN_SRAV,_, rd, rs, rt) +#define MIPS_ROR_REG(rd, rt, rs) \ + MIPS_OP_REG(FN_SRLV,RB_ROTR, rd, rs, rt) + +#define MIPS_SEB_REG(rd, rt) \ + MIPS_OP3_REG(FN3_BSHFL, BS_SEB, rd, _, rt) +#define MIPS_SEH_REG(rd, rt) \ + MIPS_OP3_REG(FN3_BSHFL, BS_SEH, rd, _, rt) + +#define MIPS_EXT_IMM(rt, rs, lsb, sz) \ + MIPS_OP3_REG(FN3_EXT, lsb, (sz)-1, rs, rt) +#define MIPS_INS_IMM(rt, rs, lsb, sz) \ + MIPS_OP3_REG(FN3_INS, lsb, (lsb)+(sz)-1, rs, rt) + +// rd = (rs < rt) +#define MIPS_SLT_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_SLT,_, rd, rs, rt) +#define MIPS_SLTU_REG(rd, rs, rt) \ + MIPS_OP_REG(FN_SLTU,_, rd, rs, rt) + +// rt = rs OP imm16 +#define MIPS_ADD_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_ADDIU, rt, rs, imm16) +#define MIPS_DADD_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_DADDIU, rt, rs, imm16) + +#define MIPS_XOR_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_XORI, rt, rs, imm16) +#define MIPS_OR_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_ORI, rt, rs, imm16) +#define MIPS_AND_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_ANDI, rt, rs, imm16) + +// rt = (imm16 << (0|16)) +#define MIPS_MOV_IMM(rt, imm16) \ + MIPS_OP_IMM(OP_ORI, rt, Z0, imm16) +#define MIPS_MOVT_IMM(rt, imm16) \ + MIPS_OP_IMM(OP_LUI, rt, _, imm16) + +// rd = rt SHIFT imm5 +#define MIPS_LSL_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SLL) +#define MIPS_LSR_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, RB_SRL, rt, rd, bits, FN_SRL) +#define MIPS_ASR_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_SRA) +#define MIPS_ROR_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, RB_ROTR, rt, rd, bits, FN_SRL) + +#define MIPS_DLSL_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_DSLL) +#define MIPS_DLSL32_IMM(rd, rt, bits) \ + MIPS_INSN(OP__FN, _, rt, rd, bits, FN_DSLL32) + +// rt = (rs < imm16) +#define MIPS_SLT_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_SLTI, rt, rs, imm16) +#define MIPS_SLTU_IMM(rt, rs, imm16) \ + MIPS_OP_IMM(OP_SLTIU, rt, rs, imm16) + +// multiplication + +#define MIPS_MULT(rt, rs) \ + MIPS_OP_REG(FN_MULT,_, _, rs, rt) +#define MIPS_MULTU(rt, rs) \ + MIPS_OP_REG(FN_MULTU,_, _, rs, rt) +#define MIPS_MADD(rt, rs) \ + MIPS_OP2_REG(FN_MADD,_, _, rs, rt) +#define MIPS_MADDU(rt, rs) \ + MIPS_OP2_REG(FN_MADDU,_, _, rs, rt) +#define MIPS_MFLO(rd) \ + MIPS_OP_REG(FN_MFLO,_, rd, _, _) +#define MIPS_MFHI(rd) \ + MIPS_OP_REG(FN_MFHI,_, rd, _, _) + +// branching + +#define MIPS_J(abs26) \ + MIPS_INSN(OP_J, _,_,_,_, (abs26) >> 2) // J-type +#define MIPS_JAL(abs26) \ + MIPS_INSN(OP_JAL, _,_,_,_, (abs26) >> 2) +#define MIPS_JR(rs) \ + MIPS_OP_REG(FN_JR,_, _,rs,_) +#define MIPS_JALR(rd, rs) \ + MIPS_OP_REG(FN_JALR,_, rd,rs,_) + +// conditional branches; no condition code, these compare rs against rt or Z0 +#define MIPS_BEQ (OP_BEQ << 5) // rs == rt (rt in lower 5 bits) +#define MIPS_BNE (OP_BNE << 5) // rs != rt (ditto) +#define MIPS_BLE (OP_BLEZ << 5) // rs <= 0 +#define MIPS_BGT (OP_BGTZ << 5) // rs > 0 +#define MIPS_BLT ((OP__RT << 5)|RT_BLTZ) // rs < 0 +#define MIPS_BGE ((OP__RT << 5)|RT_BGEZ) // rs >= 0 +#define MIPS_BLTL ((OP__RT << 5)|RT_BLTZAL) // rs > 0, always link $ra +#define MIPS_BGEL ((OP__RT << 5)|RT_BGEZAL) // rs >= 0, always link $ra + +#define MIPS_BCOND(cond, rs, rt, offs16) \ + MIPS_OP_IMM((cond >> 5), rt, rs, (offs16) >> 2) +#define MIPS_BCONDZ(cond, rs, offs16) \ + MIPS_OP_IMM((cond >> 5), (cond & 0x1f), rs, (offs16) >> 2) +#define MIPS_B(offs16) \ + MIPS_BCONDZ(MIPS_BEQ, Z0, offs16) +#define MIPS_BL(offs16) \ + MIPS_BCONDZ(MIPS_BGEL, Z0, offs16) + +// load/store indexed base + +#define MIPS_LD(rt, rs, offs16) \ + MIPS_OP_IMM(OP_LD, rt, rs, (u16)(offs16)) +#define MIPS_LW(rt, rs, offs16) \ + MIPS_OP_IMM(OP_LW, rt, rs, (u16)(offs16)) +#define MIPS_LH(rt, rs, offs16) \ + MIPS_OP_IMM(OP_LH, rt, rs, (u16)(offs16)) +#define MIPS_LB(rt, rs, offs16) \ + MIPS_OP_IMM(OP_LB, rt, rs, (u16)(offs16)) +#define MIPS_LHU(rt, rs, offs16) \ + MIPS_OP_IMM(OP_LHU, rt, rs, (u16)(offs16)) +#define MIPS_LBU(rt, rs, offs16) \ + MIPS_OP_IMM(OP_LBU, rt, rs, (u16)(offs16)) + +#define MIPS_SD(rt, rs, offs16) \ + MIPS_OP_IMM(OP_SD, rt, rs, (u16)(offs16)) +#define MIPS_SW(rt, rs, offs16) \ + MIPS_OP_IMM(OP_SW, rt, rs, (u16)(offs16)) +#define MIPS_SH(rt, rs, offs16) \ + MIPS_OP_IMM(OP_SH, rt, rs, (u16)(offs16)) +#define MIPS_SB(rt, rs, offs16) \ + MIPS_OP_IMM(OP_SB, rt, rs, (u16)(offs16)) + +// pointer operations + +#if _MIPS_SZPTR == 64 +#define OP_LP OP_LD +#define OP_SP OP_SD +#define OP_PADDIU OP_DADDIU +#define FN_PADDU FN_DADDU +#define FN_PSUBU FN_DSUBU +#define PTR_SCALE 3 +#else +#define OP_LP OP_LW +#define OP_SP OP_SW +#define OP_PADDIU OP_ADDIU +#define FN_PADDU FN_ADDU +#define FN_PSUBU FN_SUBU +#define PTR_SCALE 2 +#endif +#define PTR_SIZE (1< 0) { \ + u32 *p = (u32 *)tcache_ptr - emith_last_cnt; \ + int idx = (emith_last_idx - emith_last_cnt+1) %FSZ; \ + EMIT_PTR(p, emith_last_insns[idx]);\ + emith_last_cnt --; \ + } \ + } while (0) + +#define EMIT(op) \ + do { \ + if (emith_last_cnt >= FSZ) EMIT_PUSHOP(); \ + tcache_ptr = (void *)((u32 *)tcache_ptr + 1); \ + emith_last_idx = (emith_last_idx+1) %FSZ; \ + emith_last_insns[emith_last_idx] = op; \ + emith_last_cnt ++; \ + COUNT_OP; \ + } while (0) + +#define emith_flush() \ + do { \ + while (emith_last_cnt) EMIT_PUSHOP(); \ + emith_flg_hint = _FHV|_FHC; \ + } while (0) + +#define emith_insn_ptr() (u8 *)((u32 *)tcache_ptr - emith_last_cnt) + +// delay slot stuff +static int emith_is_j(u32 op) // J, JAL + { return ((op>>26) & 076) == OP_J; } +static int emith_is_jr(u32 op) // JR, JALR + { return (op>>26) == OP__FN && (op & 076) == FN_JR; } +static int emith_is_b(u32 op) // B + { return ((op>>26) & 074) == OP_BEQ || + ((op>>26) == OP__RT && ((op>>16) & 036) == RT_BLTZ); } +// register usage for dependency evaluation XXX better do this as in emit_arm? +static uint64_t emith_has_rs[5] = // OP__FN1-3, OP__RT, others + { 0x005ffcffffda0fd2ULL, 0x0000003300000037ULL, 0x00000000000000ffULL, + 0x800f5f0fUL, 0xf7ffffff0ff07ff0ULL }; +static uint64_t emith_has_rt[5] = // OP__FN1-3, OP__RT, others + { 0xdd5ffcffffd00cddULL, 0x0000000000000037ULL, 0x0000001100000000ULL, + 0x00000000UL, 0x80007f440c300030ULL }; +static uint64_t emith_has_rd[5] = // OP__FN1-3, OP__RT, others(rt instead of rd) + { 0xdd00fcff00d50edfULL, 0x0000003300000004ULL, 0x08000011000000ffULL, + 0x00000000UL, 0x119100ff0f00ff00ULL }; +#define emith_has_(rx,ix,op,sa,m) \ + (emith_has_##rx[ix] & (1ULL << (((op)>>(sa)) & (m)))) +static int emith_rs(u32 op) + { if ((op>>26) == OP__FN) + return emith_has_(rs,0,op, 0,0x3f) ? (op>>21)&0x1f : 0; + if ((op>>26) == OP__FN2) + return emith_has_(rs,1,op, 0,0x3f) ? (op>>21)&0x1f : 0; + if ((op>>26) == OP__FN3) + return emith_has_(rs,2,op, 0,0x3f) ? (op>>21)&0x1f : 0; + if ((op>>26) == OP__RT) + return emith_has_(rs,3,op,16,0x1f) ? (op>>21)&0x1f : 0; + return emith_has_(rs,4,op,26,0x3f) ? (op>>21)&0x1f : 0; + } +static int emith_rt(u32 op) + { if ((op>>26) == OP__FN) + return emith_has_(rt,0,op, 0,0x3f) ? (op>>16)&0x1f : 0; + if ((op>>26) == OP__FN2) + return emith_has_(rt,1,op, 0,0x3f) ? (op>>16)&0x1f : 0; + if ((op>>26) == OP__FN3) + return emith_has_(rt,2,op, 0,0x3f) ? (op>>16)&0x1f : 0; + if ((op>>26) == OP__RT) + return 0; + return emith_has_(rt,4,op,26,0x3f) ? (op>>16)&0x1f : 0; + } +static int emith_rd(u32 op) + { int ret = emith_has_(rd,4,op,26,0x3f) ? (op>>16)&0x1f :-1; + if ((op>>26) == OP__FN) + ret = emith_has_(rd,0,op, 0,0x3f) ? (op>>11)&0x1f :-1; + if ((op>>26) == OP__FN2) + ret = emith_has_(rd,1,op, 0,0x3f) ? (op>>11)&0x1f :-1; + if ((op>>26) == OP__FN3 && (op&0x3f) == FN3_BSHFL) + ret = emith_has_(rd,2,op, 0,0x3f) ? (op>>11)&0x1f :-1; + if ((op>>26) == OP__FN3 && (op&0x3f) != FN3_BSHFL) + ret = emith_has_(rd,2,op, 0,0x3f) ? (op>>16)&0x1f :-1; + if ((op>>26) == OP__RT) + ret = -1; + return (ret ? ret : -1); // Z0 doesn't have dependencies + } + +static int emith_b_isswap(u32 bop, u32 lop) +{ + if (emith_is_j(bop)) + return bop; + else if (emith_is_jr(bop) && emith_rd(lop) != emith_rs(bop)) + return bop; + else if (emith_is_b(bop) && emith_rd(lop) != emith_rs(bop) && + emith_rd(lop) != emith_rt(bop)) + if ((bop & 0xffff) != 0x7fff) // displacement overflow? + return (bop & 0xffff0000) | ((bop+1) & 0x0000ffff); + return 0; +} + +static int emith_insn_swappable(u32 op1, u32 op2) +{ + if (emith_rd(op1) != emith_rd(op2) && + emith_rs(op1) != emith_rd(op2) && emith_rt(op1) != emith_rd(op2) && + emith_rs(op2) != emith_rd(op1) && emith_rt(op2) != emith_rd(op1)) + return 1; + return 0; +} + +// emit branch, trying to fill the delay slot with one of the last insns +static void *emith_branch(u32 op) +{ + unsigned idx = emith_last_idx, ds = idx; + u32 bop = 0, sop; + void *bp; + int i, j, s; + + // check for ds insn; older mustn't interact with newer ones to overtake + for (i = 0; i < emith_last_cnt && !bop; i++) { + ds = (idx-i)%FSZ; + sop = emith_last_insns[ds]; + for (j = i, s = 1; j > 0 && s; j--) + s = emith_insn_swappable(emith_last_insns[(ds+j)%FSZ], sop); + if (s) + bop = emith_b_isswap(op, sop); + } + + // flush FIFO, but omit delay slot insn + tcache_ptr = (void *)((u32 *)tcache_ptr - emith_last_cnt); + idx = (idx-emith_last_cnt+1)%FSZ; + for (i = emith_last_cnt; i > 0; i--, idx = (idx+1)%FSZ) + if (!bop || idx != ds) + EMIT_PTR(tcache_ptr, emith_last_insns[idx]); + emith_last_cnt = 0; + // emit branch and delay slot + bp = tcache_ptr; + if (bop) { // can swap + EMIT_PTR(tcache_ptr, bop); COUNT_OP; + EMIT_PTR(tcache_ptr, emith_last_insns[ds]); + } else { // can't swap + EMIT_PTR(tcache_ptr, op); COUNT_OP; + EMIT_PTR(tcache_ptr, MIPS_NOP); COUNT_OP; + } + return bp; +} + +// if-then-else conditional execution helpers +#define JMP_POS(ptr) \ + ptr = emith_branch(MIPS_BCONDZ(cond_m, cond_r, 0)); + +#define JMP_EMIT(cond, ptr) { \ + u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \ + emith_flush(); /* prohibit delay slot switching across jump targets */ \ + EMIT_PTR(ptr, MIPS_BCONDZ(cond_m, cond_r, val_ & 0x0003ffff)); \ +} + +#define JMP_EMIT_NC(ptr) { \ + u32 val_ = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; \ + emith_flush(); \ + EMIT_PTR(ptr, MIPS_B(val_ & 0x0003ffff)); \ +} + +#define EMITH_JMP_START(cond) { \ + int cond_r, cond_m = emith_cond_check(cond, &cond_r); \ + u8 *cond_ptr; \ + JMP_POS(cond_ptr) + +#define EMITH_JMP_END(cond) \ + JMP_EMIT(cond, cond_ptr); \ +} + +#define EMITH_JMP3_START(cond) { \ + int cond_r, cond_m = emith_cond_check(cond, &cond_r); \ + u8 *cond_ptr, *else_ptr; \ + JMP_POS(cond_ptr) + +#define EMITH_JMP3_MID(cond) \ + JMP_POS(else_ptr); \ + JMP_EMIT(cond, cond_ptr); + +#define EMITH_JMP3_END() \ + JMP_EMIT_NC(else_ptr); \ +} + +// "simple" jump (no more than a few insns) +// ARM32 will use conditional instructions here +#define EMITH_SJMP_START EMITH_JMP_START +#define EMITH_SJMP_END EMITH_JMP_END + +#define EMITH_SJMP3_START EMITH_JMP3_START +#define EMITH_SJMP3_MID EMITH_JMP3_MID +#define EMITH_SJMP3_END EMITH_JMP3_END + +#define EMITH_SJMP2_START(cond) \ + EMITH_SJMP3_START(cond) +#define EMITH_SJMP2_MID(cond) \ + EMITH_SJMP3_MID(cond) +#define EMITH_SJMP2_END(cond) \ + EMITH_SJMP3_END() + + +// flag register emulation. this is modelled after arm/x86. +// the FNZ register stores the result of the last flag setting operation for +// N and Z flag, used for EQ,NE,MI,PL branches. +// the FC register stores the C flag (used for HI,HS,LO,LS,CC,CS). +// the FV register stores information for V flag calculation (used for +// GT,GE,LT,LE,VC,VS). V flag is costly and only fully calculated when needed. +// the core registers may be temp registers, since the condition after calls +// is undefined anyway. + +// flag emulation creates 2 (ie cmp #0/beq) up to 9 (ie adcf/ble) extra insns. +// flag handling shortcuts may reduce this by 1-4 insns, see emith_cond_check() +static int emith_cmp_rs, emith_cmp_rt; // registers used in cmp_r_r/cmp_r_imm +static s32 emith_cmp_imm; // immediate value used in cmp_r_imm +enum { _FHC=1, _FHV=2 } emith_flg_hint; // C/V flag usage hinted by compiler +static int emith_flg_noV; // V flag known not to be set + +#define EMITH_HINT_COND(cond) do { \ + /* only need to check cond>>1 since the lowest bit inverts the cond */ \ + unsigned _mv = BITMASK3(DCOND_VS>>1,DCOND_GE>>1,DCOND_GT>>1); \ + unsigned _mc = _mv | BITMASK2(DCOND_HS>>1,DCOND_HI>>1); \ + emith_flg_hint = (_mv & BITMASK1(cond >> 1) ? _FHV : 0); \ + emith_flg_hint |= (_mc & BITMASK1(cond >> 1) ? _FHC : 0); \ +} while (0) + +// store minimal cc information: rd, rt^rs, carry +// NB: the result *must* first go to FNZ, in case rd == rs or rd == rt. +// NB: for adcf and sbcf, carry-in must be dealt with separately (see there) +static void emith_set_arith_flags(int rd, int rs, int rt, s32 imm, int sub) +{ + if (emith_flg_hint & _FHC) { + if (sub) // C = sub:rt Z0) // Nt^Ns in FV, bit 31 + EMIT(MIPS_XOR_REG(FV, rs, rt)); + else if (rt == Z0 || imm == 0) + emith_flg_noV = 1; // imm #0 can't overflow + else if ((imm < 0) == !sub) + EMIT(MIPS_NOR_REG(FV, rs, Z0)); + else if ((imm > 0) == !sub) + EMIT(MIPS_XOR_REG(FV, rs, Z0)); + } + // full V = Nd^Nt^Ns^C calculation is deferred until really needed + + if (rd && rd != FNZ) + EMIT(MIPS_MOVE_REG(rd, FNZ)); // N,Z via result value in FNZ + emith_cmp_rs = emith_cmp_rt = -1; +} + +// since MIPS has less-than and compare-branch insns, handle cmp separately by +// storing the involved regs for later use in one of those MIPS insns. +// This works for all conditions but VC/VS, but this is fortunately never used. +static void emith_set_compare_flags(int rs, int rt, s32 imm) +{ + emith_cmp_rt = rt; + emith_cmp_rs = rs; + emith_cmp_imm = imm; +} + +// data processing, register +#define emith_move_r_r_ptr(d, s) \ + EMIT(MIPS_MOVE_REG(d, s)) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_ptr(d, s) + +#define emith_move_r_r(d, s) \ + emith_move_r_r_ptr(d, s) +#define emith_move_r_r_c(cond, d, s) \ + emith_move_r_r(d, s) + +#define emith_mvn_r_r(d, s) \ + EMIT(MIPS_MVN_REG(d, s)) + +#define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_OP_REG(FN_PADDU,_, d, s1, AT)); \ + } else EMIT(MIPS_OP_REG(FN_PADDU,_, d, s1, s2)); \ +} while (0) +#define emith_add_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_ADD_REG(d, s1, AT)); \ + } else EMIT(MIPS_ADD_REG(d, s1, s2)); \ +} while (0) + +#define emith_add_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSR_IMM(AT, s2, simm)); \ + EMIT(MIPS_ADD_REG(d, s1, AT)); \ + } else EMIT(MIPS_ADD_REG(d, s1, s2)); \ +} while (0) + +#define emith_addf_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_OP_REG(FN_PADDU,_, FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(MIPS_OP_REG(FN_PADDU,_, FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) +#define emith_addf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_ADD_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(MIPS_ADD_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_addf_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSR_IMM(AT, s2, simm)); \ + EMIT(MIPS_ADD_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(MIPS_ADD_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_sub_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_SUB_REG(d, s1, AT)); \ + } else EMIT(MIPS_SUB_REG(d, s1, s2)); \ +} while (0) + +#define emith_subf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_SUB_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 1); \ + } else { \ + EMIT(MIPS_SUB_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + } \ +} while (0) + +#define emith_or_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_OR_REG(d, s1, AT)); \ + } else EMIT(MIPS_OR_REG(d, s1, s2)); \ +} while (0) + +#define emith_or_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSR_IMM(AT, s2, simm)); \ + EMIT(MIPS_OR_REG(d, s1, AT)); \ + } else EMIT(MIPS_OR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_XOR_REG(d, s1, AT)); \ + } else EMIT(MIPS_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSR_IMM(AT, s2, simm)); \ + EMIT(MIPS_XOR_REG(d, s1, AT)); \ + } else EMIT(MIPS_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_and_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(MIPS_LSL_IMM(AT, s2, simm)); \ + EMIT(MIPS_AND_REG(d, s1, AT)); \ + } else EMIT(MIPS_AND_REG(d, s1, s2)); \ +} while (0) + +#define emith_or_r_r_lsl(d, s, lslimm) \ + emith_or_r_r_r_lsl(d, d, s, lslimm) +#define emith_or_r_r_lsr(d, s, lsrimm) \ + emith_or_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_eor_r_r_lsl(d, s, lslimm) \ + emith_eor_r_r_r_lsl(d, d, s, lslimm) +#define emith_eor_r_r_lsr(d, s, lsrimm) \ + emith_eor_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_add_r_r_r_ptr(d, s1, s2) \ + emith_add_r_r_r_lsl_ptr(d, s1, s2, 0) +#define emith_add_r_r_r(d, s1, s2) \ + emith_add_r_r_r_lsl(d, s1, s2, 0) + +#define emith_addf_r_r_r_ptr(d, s1, s2) \ + emith_addf_r_r_r_lsl_ptr(d, s1, s2, 0) +#define emith_addf_r_r_r(d, s1, s2) \ + emith_addf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_sub_r_r_r(d, s1, s2) \ + emith_sub_r_r_r_lsl(d, s1, s2, 0) + +#define emith_subf_r_r_r(d, s1, s2) \ + emith_subf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_or_r_r_r(d, s1, s2) \ + emith_or_r_r_r_lsl(d, s1, s2, 0) + +#define emith_eor_r_r_r(d, s1, s2) \ + emith_eor_r_r_r_lsl(d, s1, s2, 0) + +#define emith_and_r_r_r(d, s1, s2) \ + emith_and_r_r_r_lsl(d, s1, s2, 0) + +#define emith_add_r_r_ptr(d, s) \ + emith_add_r_r_r_lsl_ptr(d, d, s, 0) +#define emith_add_r_r(d, s) \ + emith_add_r_r_r(d, d, s) + +#define emith_sub_r_r(d, s) \ + emith_sub_r_r_r(d, d, s) + +#define emith_neg_r_r(d, s) \ + EMIT(MIPS_NEG_REG(d, s)) + +#define emith_adc_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(AT, s2, FC); \ + emith_add_r_r_r(d, s1, AT); \ +} while (0) + +#define emith_sbc_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(AT, s2, FC); \ + emith_sub_r_r_r(d, s1, AT); \ +} while (0) + +#define emith_adc_r_r(d, s) \ + emith_adc_r_r_r(d, d, s) + +#define emith_negc_r_r(d, s) \ + emith_sbc_r_r_r(d, Z0, s) + +// NB: the incoming carry Cin can cause Cout if s2+Cin=0 (or s1+Cin=0 FWIW) +// moreover, if s2+Cin=0 caused Cout, s1+s2+Cin=s1+0 can't cause another Cout +#define emith_adcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ + emith_add_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_sbcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ + emith_sub_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_and_r_r(d, s) \ + emith_and_r_r_r(d, d, s) +#define emith_and_r_r_c(cond, d, s) \ + emith_and_r_r(d, s) + +#define emith_or_r_r(d, s) \ + emith_or_r_r_r(d, d, s) + +#define emith_eor_r_r(d, s) \ + emith_eor_r_r_r(d, d, s) + +#define emith_tst_r_r_ptr(d, s) do { \ + if (d != s) { \ + emith_and_r_r_r(FNZ, d, s); \ + emith_cmp_rs = emith_cmp_rt = -1; \ + } else emith_cmp_rs = s, emith_cmp_rt = Z0; \ +} while (0) +#define emith_tst_r_r(d, s) \ + emith_tst_r_r_ptr(d, s) + +#define emith_teq_r_r(d, s) do { \ + emith_eor_r_r_r(FNZ, d, s); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_cmp_r_r(d, s) \ + emith_set_compare_flags(d, s, 0) +// emith_subf_r_r_r(FNZ, d, s) + +#define emith_addf_r_r(d, s) \ + emith_addf_r_r_r(d, d, s) + +#define emith_subf_r_r(d, s) \ + emith_subf_r_r_r(d, d, s) + +#define emith_adcf_r_r(d, s) \ + emith_adcf_r_r_r(d, d, s) + +#define emith_sbcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, d, s) + +#define emith_negcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, Z0, s) + + +// move immediate +#define MAX_HOST_LITERALS 32 // pool must be smaller than 32 KB +static uintptr_t literal_pool[MAX_HOST_LITERALS]; +static u32 *literal_insn[MAX_HOST_LITERALS]; +static int literal_pindex, literal_iindex; + +static inline int emith_pool_literal(uintptr_t imm) +{ + int idx = literal_pindex - 8; // max look behind in pool + // see if one of the last literals was the same + for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++) + if (imm == literal_pool[idx]) + break; + if (idx == literal_pindex) // store new literal + literal_pool[literal_pindex++] = imm; + return idx; +} + +static void emith_pool_commit(int jumpover) +{ + int i, sz = literal_pindex * sizeof(uintptr_t); + u8 *pool = (u8 *)tcache_ptr; + + // nothing to commit if pool is empty + if (sz == 0) + return; + // align pool to pointer size + if (jumpover) + pool += sizeof(u32); + i = (uintptr_t)pool & (sizeof(void *)-1); + pool += (i ? sizeof(void *)-i : 0); + // need branch over pool if not at block end + if (jumpover) + emith_branch(MIPS_B(sz + (pool-(u8 *)tcache_ptr))); + emith_flush(); + // safety check - pool must be after insns and reachable + if ((u32)(pool - (u8 *)literal_insn[0] + 8) > 0x7fff) { + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, + "pool offset out of range"); + exit(1); + } + // copy pool and adjust addresses in insns accessing the pool + memcpy(pool, literal_pool, sz); + for (i = 0; i < literal_iindex; i++) { + u32 *pi = literal_insn[i]; + *pi = (*pi & 0xffff0000) | (u16)(*pi + ((u8 *)pool - (u8 *)pi)); + } + // count pool constants as insns for statistics + for (i = 0; i < literal_pindex * sizeof(uintptr_t)/sizeof(u32); i++) + COUNT_OP; + + tcache_ptr = (void *)((u8 *)pool + sz); + literal_pindex = literal_iindex = 0; +} + +static void emith_pool_check(void) +{ + // check if pool must be committed + if (literal_iindex > MAX_HOST_LITERALS-4 || (literal_pindex && + (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0x7000)) + // pool full, or displacement is approaching the limit + emith_pool_commit(1); +} + +static void emith_move_imm(int r, uintptr_t imm) +{ + if ((s16)imm == imm) { + EMIT(MIPS_ADD_IMM(r, Z0, imm)); + } else if (!((u32)imm >> 16)) { + EMIT(MIPS_OR_IMM(r, Z0, imm)); + } else { + int s = Z0; + if ((u32)imm >> 16) { + EMIT(MIPS_MOVT_IMM(r, (u32)imm >> 16)); + s = r; + } + if ((u16)imm) + EMIT(MIPS_OR_IMM(r, s, (u16)imm)); + } +} +static void emith_move_ptr_imm(int r, uintptr_t imm) +{ +#if _MIPS_SZPTR == 64 + uintptr_t offs = (u8 *)imm - (u8 *)tcache_ptr - 8; + if ((s32)imm != imm && (s32)offs == offs) { + // PC relative + emith_flush(); // next insn must not change its position at all + EMIT_PTR(tcache_ptr, MIPS_BCONDZ(MIPS_BLTL, Z0, 0)); // loads PC+8 into LR + emith_move_imm(r, offs); + emith_add_r_r_r_ptr(r, LR, r); + } else if ((s32)imm != imm) { + // via literal pool + int idx; + if (literal_iindex >= MAX_HOST_LITERALS) + emith_pool_commit(1); + idx = emith_pool_literal(imm); + emith_flush(); // next 2 must not change their position at all + EMIT_PTR(tcache_ptr, MIPS_BCONDZ(MIPS_BLTL, Z0, 0)); // loads PC+8 into LR + literal_insn[literal_iindex++] = (u32 *)tcache_ptr; + EMIT_PTR(tcache_ptr, MIPS_OP_IMM(OP_LP, r, LR, idx*sizeof(uintptr_t) - 4)); + } else +#endif + emith_move_imm(r, imm); +} + +#define emith_move_r_ptr_imm(r, imm) \ + emith_move_ptr_imm(r, (uintptr_t)(imm)) + +#define emith_move_r_imm(r, imm) \ + emith_move_imm(r, (s32)(imm)) +#define emith_move_r_imm_c(cond, r, imm) \ + emith_move_r_imm(r, imm) + +#define emith_move_r_imm_s8_patchable(r, imm) \ + EMIT(MIPS_ADD_IMM(r, Z0, (s8)(imm))) +#define emith_move_r_imm_s8_patch(ptr, imm) do { \ + u32 *ptr_ = (u32 *)ptr; \ + while (*ptr_ >> 26 != OP_ADDIU) ptr_++; \ + EMIT_PTR(ptr_, (*ptr_ & 0xffff0000) | (u16)(s8)(imm)); \ +} while (0) + +// arithmetic, immediate - can only be ADDI[U], since SUBI[U] doesn't exist +static void emith_add_imm(int ptr, int rd, int rs, u32 imm) +{ + if ((s16)imm == imm) { + if (imm || rd != rs) + EMIT(MIPS_OP_IMM(ptr ? OP_PADDIU:OP_ADDIU, rd,rs,imm)); + } else if ((s32)imm < 0) { + emith_move_r_imm(AT, -imm); + EMIT(MIPS_OP_REG((ptr ? FN_PSUBU:FN_SUBU),_, rd,rs,AT)); + } else { + emith_move_r_imm(AT, imm); + EMIT(MIPS_OP_REG((ptr ? FN_PADDU:FN_ADDU),_, rd,rs,AT)); + } +} + +#define emith_add_r_imm(r, imm) \ + emith_add_r_r_imm(r, r, imm) +#define emith_add_r_imm_c(cond, r, imm) \ + emith_add_r_imm(r, imm) + +#define emith_addf_r_imm(r, imm) \ + emith_addf_r_r_imm(r, imm) + +#define emith_sub_r_imm(r, imm) \ + emith_sub_r_r_imm(r, r, imm) +#define emith_sub_r_imm_c(cond, r, imm) \ + emith_sub_r_imm(r, imm) + +#define emith_subf_r_imm(r, imm) \ + emith_subf_r_r_imm(r, r, imm) + +#define emith_adc_r_imm(r, imm) \ + emith_adc_r_r_imm(r, r, imm) + +#define emith_adcf_r_imm(r, imm) \ + emith_adcf_r_r_imm(r, r, imm) + +#define emith_cmp_r_imm(r, imm) \ + emith_set_compare_flags(r, -1, imm) +// emith_subf_r_r_imm(FNZ, r, (s16)imm) + +#define emith_add_r_r_ptr_imm(d, s, imm) \ + emith_add_imm(1, d, s, imm) + +#define emith_add_r_r_imm(d, s, imm) \ + emith_add_imm(0, d, s, imm) + +#define emith_addf_r_r_imm(d, s, imm) do { \ + emith_add_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, -1, imm, 0); \ +} while (0) + +#define emith_adc_r_r_imm(d, s, imm) do { \ + emith_add_r_r_r(AT, s, FC); \ + emith_add_r_r_imm(d, AT, imm); \ +} while (0) + +#define emith_adcf_r_r_imm(d, s, imm) do { \ + if (imm == 0) { \ + emith_add_r_r_r(FNZ, s, FC); \ + emith_set_arith_flags(d, s, -1, 1, 0); \ + } else { \ + emith_add_r_r_r(FNZ, s, FC); \ + EMIT(MIPS_SLTU_REG(AT, FNZ, FC)); \ + emith_add_r_r_imm(FNZ, FNZ, imm); \ + emith_set_arith_flags(d, s, -1, imm, 0); \ + emith_or_r_r(FC, AT); \ + } \ +} while (0) + +// NB: no SUBI in MIPS II, since ADDI takes a signed imm +#define emith_sub_r_r_imm(d, s, imm) \ + emith_add_r_r_imm(d, s, -(imm)) +#define emith_sub_r_r_imm_c(cond, d, s, imm) \ + emith_sub_r_r_imm(d, s, imm) + +#define emith_subf_r_r_imm(d, s, imm) do { \ + emith_sub_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, -1, imm, 1); \ +} while (0) + +// logical, immediate +static void emith_log_imm(int op, int rd, int rs, u32 imm) +{ + if (imm >> 16) { + emith_move_r_imm(AT, imm); + EMIT(MIPS_OP_REG(FN_AND + (op-OP_ANDI),_, rd, rs, AT)); + } else if (op == OP_ANDI || imm || rd != rs) + EMIT(MIPS_OP_IMM(op, rd, rs, imm)); +} + +#define emith_and_r_imm(r, imm) \ + emith_log_imm(OP_ANDI, r, r, imm) + +#define emith_or_r_imm(r, imm) \ + emith_log_imm(OP_ORI, r, r, imm) +#define emith_or_r_imm_c(cond, r, imm) \ + emith_or_r_imm(r, imm) + +#define emith_eor_r_imm_ptr(r, imm) \ + emith_log_imm(OP_XORI, r, r, imm) +#define emith_eor_r_imm_ptr_c(cond, r, imm) \ + emith_eor_r_imm_ptr(r, imm) + +#define emith_eor_r_imm(r, imm) \ + emith_eor_r_imm_ptr(r, imm) +#define emith_eor_r_imm_c(cond, r, imm) \ + emith_eor_r_imm(r, imm) + +/* NB: BIC #imm not available in MIPS; use AND #~imm instead */ +#define emith_bic_r_imm(r, imm) \ + emith_log_imm(OP_ANDI, r, r, ~(imm)) +#define emith_bic_r_imm_c(cond, r, imm) \ + emith_bic_r_imm(r, imm) + +#define emith_tst_r_imm(r, imm) do { \ + emith_log_imm(OP_ANDI, FNZ, r, imm); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) +#define emith_tst_r_imm_c(cond, r, imm) \ + emith_tst_r_imm(r, imm) + +#define emith_and_r_r_imm(d, s, imm) \ + emith_log_imm(OP_ANDI, d, s, imm) + +#define emith_or_r_r_imm(d, s, imm) \ + emith_log_imm(OP_ORI, d, s, imm) + +#define emith_eor_r_r_imm(d, s, imm) \ + emith_log_imm(OP_XORI, d, s, imm) + +// shift +#define emith_lsl(d, s, cnt) \ + EMIT(MIPS_LSL_IMM(d, s, cnt)) + +#define emith_lsr(d, s, cnt) \ + EMIT(MIPS_LSR_IMM(d, s, cnt)) + +#define emith_asr(d, s, cnt) \ + EMIT(MIPS_ASR_IMM(d, s, cnt)) + +#define emith_ror(d, s, cnt) do { \ + if (__mips_isa_rev < 2) { \ + EMIT(MIPS_LSL_IMM(AT, s, 32-(cnt))); \ + EMIT(MIPS_LSR_IMM(d, s, cnt)); \ + EMIT(MIPS_OR_REG(d, d, AT)); \ + } else EMIT(MIPS_ROR_IMM(d, s, cnt)); \ +} while (0) +#define emith_ror_c(cond, d, s, cnt) \ + emith_ror(d, s, cnt) + +#define emith_rol(d, s, cnt) do { \ + if (__mips_isa_rev < 2) { \ + EMIT(MIPS_LSR_IMM(AT, s, 32-(cnt))); \ + EMIT(MIPS_LSL_IMM(d, s, cnt)); \ + EMIT(MIPS_OR_REG(d, d, AT)); \ + } else EMIT(MIPS_ROR_IMM(d, s, 32-(cnt))); \ +} while (0) + +#define emith_rorc(d) do { \ + emith_lsr(d, d, 1); \ + emith_lsl(AT, FC, 31); \ + emith_or_r_r(d, AT); \ +} while (0) + +#define emith_rolc(d) do { \ + emith_lsl(d, d, 1); \ + emith_or_r_r(d, FC); \ +} while (0) + +// NB: all flag setting shifts make V undefined +#define emith_lslf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsl(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_lsr(FC, _s, 31); \ + emith_lsl(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_lsrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_lsr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_asrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_asr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_asr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_rolf(d, s, cnt) do { \ + emith_rol(d, s, cnt); \ + emith_and_r_r_imm(FC, d, 1); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_rorf(d, s, cnt) do { \ + emith_ror(d, s, cnt); \ + emith_lsr(FC, d, 31); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_rolcf(d) do { \ + emith_lsr(AT, d, 31); \ + emith_lsl(d, d, 1); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_rorcf(d) do { \ + emith_and_r_r_imm(AT, d, 1); \ + emith_lsr(d, d, 1); \ + emith_lsl(FC, FC, 31); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +// signed/unsigned extend +#define emith_clear_msb(d, s, count) /* bits to clear */ do { \ + u32 t; \ + if (__mips_isa_rev >= 2) \ + EMIT(MIPS_EXT_IMM(d, s, 0, 32-(count))); \ + else if ((count) >= 16) { \ + t = (count) - 16; \ + t = 0xffff >> t; \ + emith_and_r_r_imm(d, s, t); \ + } else { \ + emith_lsl(d, s, count); \ + emith_lsr(d, d, count); \ + } \ +} while (0) +#define emith_clear_msb_c(cond, d, s, count) \ + emith_clear_msb(d, s, count) + +#define emith_sext(d, s, count) /* bits to keep */ do { \ + if (__mips_isa_rev >= 2 && count == 8) \ + EMIT(MIPS_SEB_REG(d, s)); \ + else if (__mips_isa_rev >= 2 && count == 16) \ + EMIT(MIPS_SEH_REG(d, s)); \ + else { \ + emith_lsl(d, s, 32-(count)); \ + emith_asr(d, d, 32-(count)); \ + } \ +} while (0) + +// multiply Rd = Rn*Rm (+ Ra); NB: next 2 insns after MFLO/MFHI mustn't be MULT +static u8 *last_lohi; +static void emith_lohi_nops(void) +{ + u32 d; + while ((d = (u8 *)tcache_ptr - last_lohi) < 8 && d >= 0) EMIT(MIPS_NOP); +} + +#define emith_mul(d, s1, s2) do { \ + emith_lohi_nops(); \ + EMIT(MIPS_MULTU(s1, s2)); \ + EMIT(MIPS_MFLO(d)); \ + last_lohi = (u8 *)tcache_ptr; \ +} while (0) + +#define emith_mul_u64(dlo, dhi, s1, s2) do { \ + emith_lohi_nops(); \ + EMIT(MIPS_MULTU(s1, s2)); \ + EMIT(MIPS_MFLO(dlo)); \ + EMIT(MIPS_MFHI(dhi)); \ + last_lohi = (u8 *)tcache_ptr; \ +} while (0) + +#define emith_mul_s64(dlo, dhi, s1, s2) do { \ + emith_lohi_nops(); \ + EMIT(MIPS_MULT(s1, s2)); \ + EMIT(MIPS_MFLO(dlo)); \ + EMIT(MIPS_MFHI(dhi)); \ + last_lohi = (u8 *)tcache_ptr; \ +} while (0) + +#define emith_mula_s64(dlo, dhi, s1, s2) do { \ + int t_ = rcache_get_tmp(); \ + emith_lohi_nops(); \ + EMIT(MIPS_MULT(s1, s2)); \ + EMIT(MIPS_MFLO(AT)); \ + EMIT(MIPS_MFHI(t_)); \ + last_lohi = (u8 *)tcache_ptr; \ + emith_add_r_r(dlo, AT); \ + EMIT(MIPS_SLTU_REG(AT, dlo, AT)); \ + emith_add_r_r(dhi, AT); \ + emith_add_r_r(dhi, t_); \ + rcache_free_tmp(t_); \ +} while (0) +#define emith_mula_s64_c(cond, dlo, dhi, s1, s2) \ + emith_mula_s64(dlo, dhi, s1, s2) + +// load/store. offs has 16 bits signed, which is currently sufficient +#define emith_read_r_r_offs_ptr(r, rs, offs) \ + EMIT(MIPS_OP_IMM(OP_LP, r, rs, offs)) +#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_read_r_r_offs_ptr(r, rs, offs) + +#define emith_read_r_r_offs(r, rs, offs) \ + EMIT(MIPS_LW(r, rs, offs)) +#define emith_read_r_r_offs_c(cond, r, rs, offs) \ + emith_read_r_r_offs(r, rs, offs) + +#define emith_read_r_r_r_ptr(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + EMIT(MIPS_OP_IMM(OP_LP, r, AT, 0)); \ +} while (0) + +#define emith_read_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + EMIT(MIPS_LW(r, AT, 0)); \ +} while (0) +#define emith_read_r_r_r_c(cond, r, rs, rm) \ + emith_read_r_r_r(r, rs, rm) + +#define emith_read8_r_r_offs(r, rs, offs) \ + EMIT(MIPS_LBU(r, rs, offs)) +#define emith_read8_r_r_offs_c(cond, r, rs, offs) \ + emith_read8_r_r_offs(r, rs, offs) + +#define emith_read8_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + EMIT(MIPS_LBU(r, AT, 0)); \ +} while (0) +#define emith_read8_r_r_r_c(cond, r, rs, rm) \ + emith_read8_r_r_r(r, rs, rm) + +#define emith_read16_r_r_offs(r, rs, offs) \ + EMIT(MIPS_LHU(r, rs, offs)) +#define emith_read16_r_r_offs_c(cond, r, rs, offs) \ + emith_read16_r_r_offs(r, rs, offs) + +#define emith_read16_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + EMIT(MIPS_LHU(r, AT, 0)); \ +} while (0) +#define emith_read16_r_r_r_c(cond, r, rs, rm) \ + emith_read16_r_r_r(r, rs, rm) + +#define emith_read8s_r_r_offs(r, rs, offs) \ + EMIT(MIPS_LB(r, rs, offs)) +#define emith_read8s_r_r_offs_c(cond, r, rs, offs) \ + emith_read8s_r_r_offs(r, rs, offs) + +#define emith_read8s_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + EMIT(MIPS_LB(r, AT, 0)); \ +} while (0) +#define emith_read8s_r_r_r_c(cond, r, rs, rm) \ + emith_read8s_r_r_r(r, rs, rm) + +#define emith_read16s_r_r_offs(r, rs, offs) \ + EMIT(MIPS_LH(r, rs, offs)) +#define emith_read16s_r_r_offs_c(cond, r, rs, offs) \ + emith_read16s_r_r_offs(r, rs, offs) + +#define emith_read16s_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + EMIT(MIPS_LH(r, AT, 0)); \ +} while (0) +#define emith_read16s_r_r_r_c(cond, r, rs, rm) \ + emith_read16s_r_r_r(r, rs, rm) + + +#define emith_write_r_r_offs_ptr(r, rs, offs) \ + EMIT(MIPS_OP_IMM(OP_SP, r, rs, offs)) +#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_write_r_r_offs_ptr(r, rs, offs) + +#define emith_write_r_r_r_ptr(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + EMIT(MIPS_OP_IMM(OP_SP, r, AT, 0)); \ +} while (0) +#define emith_write_r_r_r_ptr_c(cond, r, rs, rm) \ + emith_write_r_r_r_ptr(r, rs, rm) + +#define emith_write_r_r_offs(r, rs, offs) \ + EMIT(MIPS_SW(r, rs, offs)) +#define emith_write_r_r_offs_c(cond, r, rs, offs) \ + emith_write_r_r_offs(r, rs, offs) + +#define emith_write_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + EMIT(MIPS_SW(r, AT, 0)); \ +} while (0) +#define emith_write_r_r_r_c(cond, r, rs, rm) \ + emith_write_r_r_r(r, rs, rm) + +#define emith_ctx_read_ptr(r, offs) \ + emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_read(r, offs) \ + emith_read_r_r_offs(r, CONTEXT_REG, offs) +#define emith_ctx_read_c(cond, r, offs) \ + emith_ctx_read(r, offs) + +#define emith_ctx_write_ptr(r, offs) \ + emith_write_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_write(r, offs) \ + emith_write_r_r_offs(r, CONTEXT_REG, offs) + +#define emith_ctx_read_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_read(r_, offs_); \ +} while (0) + +#define emith_ctx_write_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_write(r_, offs_); \ +} while (0) + +// function call handling +#define emith_save_caller_regs(mask) do { \ + int _c; u32 _m = mask & 0x300fffc; /* r2-r15,r24-r25 */ \ + if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align */ \ + int _s = count_bits(_m) * 4, _o = _s; \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ + for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) \ + { _o -= 4; if (_c) emith_write_r_r_offs(_c, SP, _o); } \ +} while (0) + +#define emith_restore_caller_regs(mask) do { \ + int _c; u32 _m = mask & 0x300fffc; \ + if (__builtin_parity(_m) == 1) _m |= 0x1; \ + int _s = count_bits(_m) * 4, _o = 0; \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs(_c, SP, _o); _o += 4; } \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ +} while (0) + +#define host_call(addr, args) \ + addr + +#define host_arg2reg(rd, arg) \ + rd = (arg+4) + +#define emith_pass_arg_r(arg, reg) \ + emith_move_r_r_ptr(arg, reg) + +#define emith_pass_arg_imm(arg, imm) \ + emith_move_r_ptr_imm(arg, imm) + +// branching +#define emith_invert_branch(cond) /* inverted conditional branch */ \ + (((cond) >> 5) == OP__RT ? (cond) ^ 0x01 : (cond) ^ 0x20) + +// evaluate the emulated condition, returns a register/branch type pair +static int emith_cmpr_check(int rs, int rt, int cond, int *r) +{ + int b = 0; + + // condition check for comparing 2 registers + switch (cond) { + case DCOND_EQ: *r = rs; b = MIPS_BEQ|rt; break; + case DCOND_NE: *r = rs; b = MIPS_BNE|rt; break; + case DCOND_LO: EMIT(MIPS_SLTU_REG(AT, rs, rt)); + *r = AT, b = MIPS_BNE; break; // s < t unsigned + case DCOND_HS: EMIT(MIPS_SLTU_REG(AT, rs, rt)); + *r = AT, b = MIPS_BEQ; break; // s >= t unsigned + case DCOND_LS: EMIT(MIPS_SLTU_REG(AT, rt, rs)); + *r = AT, b = MIPS_BEQ; break; // s <= t unsigned + case DCOND_HI: EMIT(MIPS_SLTU_REG(AT, rt, rs)); + *r = AT, b = MIPS_BNE; break; // s > t unsigned + case DCOND_LT: if (rt == 0) { *r = rs, b = MIPS_BLT; break; } // s < 0 + EMIT(MIPS_SLT_REG(AT, rs, rt)); + *r = AT, b = MIPS_BNE; break; // s < t + case DCOND_GE: if (rt == 0) { *r = rs, b = MIPS_BGE; break; } // s >= 0 + EMIT(MIPS_SLT_REG(AT, rs, rt)); + *r = AT, b = MIPS_BEQ; break; // s >= t + case DCOND_LE: if (rt == 0) { *r = rs, b = MIPS_BLE; break; } // s <= 0 + EMIT(MIPS_SLT_REG(AT, rt, rs)); + *r = AT, b = MIPS_BEQ; break; // s <= t + case DCOND_GT: if (rt == 0) { *r = rs, b = MIPS_BGT; break; } // s > 0 + EMIT(MIPS_SLT_REG(AT, rt, rs)); + *r = AT, b = MIPS_BNE; break; // s > t + } + + return b; +} + +static int emith_cmpi_check(int rs, s32 imm, int cond, int *r) +{ + int b = 0; + + // condition check for comparing register with immediate + if (imm == 0) return emith_cmpr_check(rs, Z0, cond, r); + switch (cond) { + case DCOND_EQ: emith_move_r_imm(AT, imm); + *r = rs; b = MIPS_BEQ|AT; break; + case DCOND_NE: emith_move_r_imm(AT, imm); + *r = rs; b = MIPS_BNE|AT; break; + case DCOND_LO: EMIT(MIPS_SLTU_IMM(AT, rs, imm)); + *r = AT, b = MIPS_BNE; break; // s < imm unsigned + case DCOND_HS: EMIT(MIPS_SLTU_IMM(AT, rs, imm)); + *r = AT, b = MIPS_BEQ; break; // s >= imm unsigned + case DCOND_LS: emith_move_r_imm(AT, imm); + EMIT(MIPS_SLTU_REG(AT, AT, rs)); + *r = AT, b = MIPS_BEQ; break; // s <= imm unsigned + case DCOND_HI: emith_move_r_imm(AT, imm); + EMIT(MIPS_SLTU_REG(AT, AT, rs)); + *r = AT, b = MIPS_BNE; break; // s > imm unsigned + case DCOND_LT: EMIT(MIPS_SLT_IMM(AT, rs, imm)); + *r = AT, b = MIPS_BNE; break; // s < imm + case DCOND_GE: EMIT(MIPS_SLT_IMM(AT, rs, imm)); + *r = AT, b = MIPS_BEQ; break; // s >= imm + case DCOND_LE: emith_move_r_imm(AT, imm); + EMIT(MIPS_SLT_REG(AT, AT, rs)); + *r = AT, b = MIPS_BEQ; break; // s <= imm + case DCOND_GT: emith_move_r_imm(AT, imm); + EMIT(MIPS_SLT_REG(AT, AT, rs)); + *r = AT, b = MIPS_BNE; break; // s > imm + } + return b; +} + +static int emith_cond_check(int cond, int *r) +{ + int b = 0; + + if (emith_cmp_rs >= 0) { + if (emith_cmp_rt != -1) + b = emith_cmpr_check(emith_cmp_rs,emith_cmp_rt, cond,r); + else b = emith_cmpi_check(emith_cmp_rs,emith_cmp_imm,cond,r); + } + + // shortcut for V known to be 0 + if (!b && emith_flg_noV) switch (cond) { + case DCOND_VS: *r = Z0; b = MIPS_BNE; break; // never + case DCOND_VC: *r = Z0; b = MIPS_BEQ; break; // always + case DCOND_LT: *r = FNZ, b = MIPS_BLT; break; // N + case DCOND_GE: *r = FNZ, b = MIPS_BGE; break; // !N + case DCOND_LE: *r = FNZ, b = MIPS_BLE; break; // N || Z + case DCOND_GT: *r = FNZ, b = MIPS_BGT; break; // !N && !Z + } + + // the full monty if no shortcut + if (!b) switch (cond) { + // conditions using NZ + case DCOND_EQ: *r = FNZ; b = MIPS_BEQ; break; // Z + case DCOND_NE: *r = FNZ; b = MIPS_BNE; break; // !Z + case DCOND_MI: *r = FNZ; b = MIPS_BLT; break; // N + case DCOND_PL: *r = FNZ; b = MIPS_BGE; break; // !N + // conditions using C + case DCOND_LO: *r = FC; b = MIPS_BNE; break; // C + case DCOND_HS: *r = FC; b = MIPS_BEQ; break; // !C + // conditions using CZ + case DCOND_LS: // C || Z + case DCOND_HI: // !C && !Z + EMIT(MIPS_ADD_IMM(AT, FC, -1)); // !C && !Z + EMIT(MIPS_AND_REG(AT, FNZ, AT)); + *r = AT, b = (cond == DCOND_HI ? MIPS_BNE : MIPS_BEQ); + break; + + // conditions using V + case DCOND_VS: // V + case DCOND_VC: // !V + EMIT(MIPS_XOR_REG(AT, FV, FNZ)); // V = Nt^Ns^Nd^C + EMIT(MIPS_LSR_IMM(AT, AT, 31)); + EMIT(MIPS_XOR_REG(AT, AT, FC)); + *r = AT, b = (cond == DCOND_VS ? MIPS_BNE : MIPS_BEQ); + break; + // conditions using VNZ + case DCOND_LT: // N^V + case DCOND_GE: // !(N^V) + EMIT(MIPS_LSR_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(MIPS_XOR_REG(AT, FC, AT)); + *r = AT, b = (cond == DCOND_LT ? MIPS_BNE : MIPS_BEQ); + break; + case DCOND_LE: // (N^V) || Z + case DCOND_GT: // !(N^V) && !Z + EMIT(MIPS_LSR_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(MIPS_XOR_REG(AT, FC, AT)); + EMIT(MIPS_ADD_IMM(AT, AT, -1)); // !(Nd^V) && !Z + EMIT(MIPS_AND_REG(AT, FNZ, AT)); + *r = AT, b = (cond == DCOND_GT ? MIPS_BNE : MIPS_BEQ); + break; + } + return b; +} + +// NB: assumes all targets are in the same 256MB segment +#define emith_jump(target) \ + emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)) +#define emith_jump_patchable(target) \ + emith_jump(target) + +// NB: MIPS conditional branches have only +/- 128KB range +#define emith_jump_cond(cond, target) do { \ + int r_, mcond_ = emith_cond_check(cond, &r_); \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr - 4; \ + emith_branch(MIPS_BCONDZ(mcond_,r_,disp_ & 0x0003ffff)); \ +} while (0) +#define emith_jump_cond_patchable(cond, target) \ + emith_jump_cond(cond, target) + +#define emith_jump_cond_inrange(target) \ + ((u8 *)target - (u8 *)tcache_ptr - 4 < 0x20000 && \ + (u8 *)target - (u8 *)tcache_ptr - 4 >= -0x20000+0x10) //mind cond_check + +// NB: returns position of patch for cache maintenance +#define emith_jump_patch(ptr, target, pos) do { \ + u32 *ptr_ = (u32 *)ptr-1; /* must skip condition check code */ \ + u32 disp_, mask_; \ + while (!emith_is_j(*ptr_) && !emith_is_b(*ptr_)) ptr_ ++; \ + if (emith_is_b(*ptr_)) \ + mask_ = 0xffff0000, disp_ = (u8 *)target - (u8 *)ptr_ - 4; \ + else mask_ = 0xfc000000, disp_ = (uintptr_t)target; \ + EMIT_PTR(ptr_, (*ptr_ & mask_) | ((disp_ >> 2) & ~mask_)); \ + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \ +} while (0) + +#define emith_jump_patch_inrange(ptr, target) \ + ((u8 *)target - (u8 *)ptr - 4 < 0x20000 && \ + (u8 *)target - (u8 *)ptr - 4 >= -0x20000+0x10) // mind cond_check +#define emith_jump_patch_size() 4 + +#define emith_jump_at(ptr, target) do { \ + u32 *ptr_ = (u32 *)ptr; \ + EMIT_PTR(ptr_, MIPS_J((uintptr_t)target & 0x0fffffff)); \ + EMIT_PTR(ptr_, MIPS_NOP); \ +} while (0) +#define emith_jump_at_size() 8 + +#define emith_jump_reg(r) \ + emith_branch(MIPS_JR(r)) +#define emith_jump_reg_c(cond, r) \ + emith_jump_reg(r) + +#define emith_jump_ctx(offs) do { \ + emith_ctx_read_ptr(CR, offs); \ + emith_jump_reg(CR); \ +} while (0) +#define emith_jump_ctx_c(cond, offs) \ + emith_jump_ctx(offs) + +#define emith_call(target) \ + emith_branch(MIPS_JAL((uintptr_t)target & 0x0fffffff)) +#define emith_call_cond(cond, target) \ + emith_call(target) + +#define emith_call_reg(r) \ + emith_branch(MIPS_JALR(LR, r)) +#define emith_abicall_ctx(offs) do { \ + emith_ctx_read_ptr(CR, offs); \ + emith_call_reg(CR); \ +} while (0) + +#define emith_abijump_reg(r) do { \ + if ((r) != CR) emith_move_r_r(CR, r); \ + emith_branch(MIPS_JR(CR)); \ +} while (0) +#define emith_abijump_reg_c(cond, r) \ + emith_abijump_reg(r) +#define emith_abicall(target) do { \ + emith_move_r_ptr_imm(CR, target); \ + emith_branch(MIPS_JALR(LR, CR)); \ +} while (0) +#define emith_abicall_cond(cond, target) \ + emith_abicall(target) +#define emith_abicall_reg(r) do { \ + if ((r) != CR) emith_move_r_r(CR, r); \ + emith_branch(MIPS_JALR(LR, CR)); \ +} while (0) + +#define emith_call_cleanup() /**/ + +#define emith_ret() \ + emith_branch(MIPS_JR(LR)) +#define emith_ret_c(cond) \ + emith_ret() + +#define emith_ret_to_ctx(offs) \ + emith_ctx_write_ptr(LR, offs) + +#define emith_add_r_ret(r) \ + emith_add_r_r_ptr(r, LR) + +// NB: ABI SP alignment is 8 for 64 bit, O32 has a 16 byte arg save area +#define emith_push_ret(r) do { \ + int offs_ = 8+16 - 2*PTR_SIZE; \ + emith_add_r_r_ptr_imm(SP, SP, -8-16); \ + emith_write_r_r_offs_ptr(LR, SP, offs_ + PTR_SIZE); \ + if ((r) > 0) emith_write_r_r_offs(r, SP, offs_); \ +} while (0) + +#define emith_pop_and_ret(r) do { \ + int offs_ = 8+16 - 2*PTR_SIZE; \ + if ((r) > 0) emith_read_r_r_offs(r, SP, offs_); \ + emith_read_r_r_offs_ptr(LR, SP, offs_ + PTR_SIZE); \ + emith_add_r_r_ptr_imm(SP, SP, 8+16); \ + emith_ret(); \ +} while (0) + + +// emitter ABI stuff +#define emith_update_cache() /**/ +#define emith_rw_offs_max() 0x7fff +#define emith_uext_ptr(r) /**/ + +#if __mips_isa_rev >= 2 && defined(MIPS_USE_SYNCI) && defined(__GNUC__) +// this should normally be in libc clear_cache; however, it sometimes isn't. +// core function taken from SYNCI description, MIPS32 instruction set manual +static NOINLINE void host_instructions_updated(void *base, void *end, int force) +{ + int step, tmp; + asm volatile( + " rdhwr %2, $1;" + " bal 0f;" // needed to allow for jr.hb: +#if _MIPS_SZPTR == 64 + "0: daddiu $ra, $ra, 3f-0b;" // set ra to insn after jr.hb +#else + "0: addiu $ra, $ra, 3f-0b;" // set ra to insn after jr.hb +#endif + " beqz %2, 3f;" + + "1: synci 0(%0);" + " sltu %3, %0, %1;" +#if _MIPS_SZPTR == 64 + " daddu %0, %0, %2;" +#else + " addu %0, %0, %2;" +#endif + " bnez %3, 1b;" + + " sync;" + "2: jr.hb $ra;" + "3: " : "+r"(base), "+r"(end), "=r"(step), "=r"(tmp) :: "$31"); +} +#else +#define host_instructions_updated(base, end, force) __builtin___clear_cache(base, end) +#endif + +// SH2 drc specific +#define emith_sh2_drc_entry() do { \ + int _c, _z = PTR_SIZE; u32 _m = 0xd0ff0000; \ + if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align for SP is 8 */ \ + int _s = count_bits(_m) * _z + 16, _o = _s; /* 16 O32 arg save area */ \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ + for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) \ + { _o -= _z; if (_c) emith_write_r_r_offs_ptr(_c, SP, _o); } \ +} while (0) +#define emith_sh2_drc_exit() do { \ + int _c, _z = PTR_SIZE; u32 _m = 0xd0ff0000; \ + if (__builtin_parity(_m) == 1) _m |= 0x1; \ + int _s = count_bits(_m) * _z + 16, _o = 16; \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs_ptr(_c, SP, _o); _o += _z; } \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ + emith_ret(); \ +} while (0) + +// NB: assumes a is in arg0, tab, func and mask are temp +#define emith_sh2_rcall(a, tab, func, mask) do { \ + emith_lsr(mask, a, SH2_READ_SHIFT); \ + emith_add_r_r_r_lsl_ptr(tab, tab, mask, PTR_SCALE+1); \ + emith_read_r_r_offs_ptr(func, tab, 0); \ + emith_read_r_r_offs(mask, tab, (1 << PTR_SCALE)); \ + emith_addf_r_r_r_ptr(func, func, func); \ +} while (0) + +// NB: assumes a, val are in arg0 and arg1, tab and func are temp +#define emith_sh2_wcall(a, val, tab, func) do { \ + emith_lsr(func, a, SH2_WRITE_SHIFT); \ + emith_lsl(func, func, PTR_SCALE); \ + emith_read_r_r_r_ptr(CR, tab, func); \ + emith_move_r_r_ptr(6, CONTEXT_REG); /* arg2 */ \ + emith_abijump_reg(CR); \ +} while (0) + +#define emith_sh2_delay_loop(cycles, reg) do { \ + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \ + int t1 = rcache_get_tmp(); \ + int t2 = rcache_get_tmp(); \ + int t3 = rcache_get_tmp(); \ + /* if (sr < 0) return */ \ + emith_cmp_r_imm(sr, 0); \ + EMITH_JMP_START(DCOND_LE); \ + /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \ + emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ + rcache_free_tmp(t3); \ + if (reg >= 0) { \ + /* if (reg <= turns) turns = reg-1 */ \ + t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ + emith_cmp_r_r(t3, t2); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \ + EMITH_SJMP_END(DCOND_HI); \ + /* if (reg <= 1) turns = 0 */ \ + emith_cmp_r_imm(t3, 1); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_move_r_imm_c(DCOND_LS, t2, 0); \ + EMITH_SJMP_END(DCOND_HI); \ + /* reg -= turns */ \ + emith_sub_r_r(t3, t2); \ + } \ + /* sr.cycles -= turns * cycles; */ \ + emith_move_r_imm(t1, cycles); \ + emith_mul(t1, t2, t1); \ + emith_sub_r_r_r_lsl(sr, sr, t1, 12); \ + EMITH_JMP_END(DCOND_LE); \ + rcache_free_tmp(t1); \ + rcache_free_tmp(t2); \ +} while (0) + +/* + * T = !carry(Rn = (Rn << 1) | T) + * if Q + * C = carry(Rn += Rm) + * else + * C = carry(Rn -= Rm) + * T ^= C + */ +#define emith_sh2_div1_step(rn, rm, sr) do { \ + int t_ = rcache_get_tmp(); \ + emith_and_r_r_imm(AT, sr, T); \ + emith_lsr(FC, rn, 31); /*Rn = (Rn<<1)+T*/ \ + emith_lsl(t_, rn, 1); \ + emith_or_r_r(t_, AT); \ + emith_or_r_imm(sr, T); /* T = !carry */ \ + emith_eor_r_r(sr, FC); \ + emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ + EMITH_JMP3_START(DCOND_EQ); \ + emith_add_r_r_r(rn, t_, rm); \ + EMIT(MIPS_SLTU_REG(FC, rn, t_)); \ + EMITH_JMP3_MID(DCOND_EQ); \ + emith_sub_r_r_r(rn, t_, rm); \ + EMIT(MIPS_SLTU_REG(FC, t_, rn)); \ + EMITH_JMP3_END(); \ + emith_eor_r_r(sr, FC); /* T ^= carry */ \ + rcache_free_tmp(t_); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \ + emith_sext(mh, mh, 16); \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ + /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ + emith_asr(rn, mh, 15); \ + emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \ + emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_MI); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* XXX: MACH should be untouched when S is set? */ \ + emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ + /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ + emith_lsr(rn, ml, 31); \ + emith_add_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \ + emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ + /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \ + EMITH_SJMP_START(DCOND_MI); /* sum < 0 -> +ovrfl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_MI); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +#define emith_write_sr(sr, srcr) do { \ + if (__mips_isa_rev < 2) { \ + emith_lsr(sr, sr , 10); emith_lsl(sr, sr, 10); \ + emith_lsl(AT, srcr, 22); emith_lsr(AT, AT, 22); \ + emith_or_r_r(sr, AT); \ + } else EMIT(MIPS_INS_IMM(sr, srcr, 0, 10)); \ +} while (0) + +#define emith_carry_to_t(sr, is_sub) do { \ + if (__mips_isa_rev < 2) { \ + emith_and_r_imm(sr, 0xfffffffe); \ + emith_or_r_r(sr, FC); \ + } else EMIT(MIPS_INS_IMM(sr, FC, 0, 1)); \ +} while (0) + +#define emith_t_to_carry(sr, is_sub) do { \ + emith_and_r_r_imm(FC, sr, 1); \ +} while (0) + +#define emith_tpop_carry(sr, is_sub) do { \ + emith_and_r_r_imm(FC, sr, 1); \ + emith_eor_r_r(sr, FC); \ +} while (0) + +#define emith_tpush_carry(sr, is_sub) \ + emith_or_r_r(sr, FC) + +#ifdef T +// T bit handling +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +static void emith_clr_t_cond(int sr) +{ + emith_bic_r_imm(sr, T); +} + +static void emith_set_t_cond(int sr, int cond) +{ + int b, r; + u8 *ptr; + u32 val = 0, inv = 0; + + // try to avoid jumping around if possible + if (emith_cmp_rs >= 0) { + if (emith_cmp_rt >= 0) + b = emith_cmpr_check(emith_cmp_rs, emith_cmp_rt, cond, &r); + else + b = emith_cmpi_check(emith_cmp_rs, emith_cmp_imm, cond, &r); + + // XXX this relies on the inner workings of cmp_check... + if (r == AT) + // result of slt check which returns either 0 or 1 in AT + val++, inv = (b == MIPS_BEQ); + } else { + b = emith_cond_check(cond, &r); + if (r == Z0) { + if (b == MIPS_BEQ || b == MIPS_BLE || b == MIPS_BGE) + emith_or_r_imm(sr, T); + return; + } else if (r == FC) + val++, inv = (b == MIPS_BEQ); + } + + if (!val) switch (b) { // cases: b..z r, aka cmp r,Z0 or cmp r,#0 + case MIPS_BEQ: EMIT(MIPS_SLTU_IMM(AT, r, 1)); r=AT; val++; break; + case MIPS_BNE: EMIT(MIPS_SLTU_REG(AT,Z0, r)); r=AT; val++; break; + case MIPS_BLT: EMIT(MIPS_SLT_REG(AT, r, Z0)); r=AT; val++; break; + case MIPS_BGE: EMIT(MIPS_SLT_REG(AT, r, Z0)); r=AT; val++; inv++; break; + case MIPS_BLE: EMIT(MIPS_SLT_REG(AT, Z0, r)); r=AT; val++; inv++; break; + case MIPS_BGT: EMIT(MIPS_SLT_REG(AT, Z0, r)); r=AT; val++; break; + default: // cases: beq/bne r,s, aka cmp r,s + if ((b>>5) == OP_BEQ) { + EMIT(MIPS_XOR_REG(AT, r, b&0x1f)); + EMIT(MIPS_SLTU_IMM(AT,AT, 1)); r=AT; val++; break; + } else if ((b>>5) == OP_BNE) { + EMIT(MIPS_XOR_REG(AT, r, b&0x1f)); + EMIT(MIPS_SLTU_REG(AT,Z0,AT)); r=AT; val++; break; + } + } + if (val) { + emith_or_r_r(sr, r); + if (inv) + emith_eor_r_imm(sr, T); + return; + } + + // can't obtain result directly, use presumably slower jump !cond + or sr,T + b = emith_invert_branch(b); + ptr = emith_branch(MIPS_BCONDZ(b, r, 0)); + emith_or_r_imm(sr, T); + emith_flush(); // prohibit delay slot switching across jump targets + val = (u8 *)tcache_ptr - (u8 *)(ptr) - 4; + EMIT_PTR(ptr, MIPS_BCONDZ(b, r, val & 0x0003ffff)); +} + +#define emith_get_t_cond() -1 + +#define emith_sync_t(sr) ((void)sr) + +#define emith_invalidate_t() + +static void emith_set_t(int sr, int val) +{ + if (val) + emith_or_r_imm(sr, T); + else + emith_bic_r_imm(sr, T); +} + +static int emith_tst_t(int sr, int tf) +{ + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; +} +#endif diff --git a/cpu/drc/emit_ppc.c b/cpu/drc/emit_ppc.c new file mode 100644 index 00000000..620c7c1c --- /dev/null +++ b/cpu/drc/emit_ppc.c @@ -0,0 +1,1844 @@ +/* + * Basic macros to emit PowerISA 2.03 64 bit instructions and some utils + * Copyright (C) 2020-2024 irixxxx + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ + +// NB bit numbers are reversed in PPC (MSB is bit 0). The emith_* functions and +// macros must take this into account. + +// NB PPC was a 64 bit architecture from the onset, so basically all operations +// are operating on 64 bits. 32 bit arch was only added later on, and there are +// very few 32 bit operations (cmp*, shift/rotate, extract/insert, load/store). +// For most operations the upper bits don't spill into the lower word, for the +// others there is an appropriate 32 bit operation available. + +// NB PowerPC isn't a clean RISC design. Several insns use microcode, which is +// AFAIK notably slower than using some 2-3 non-microcode insns. So, using +// such insns should by avoided if possible. Listed in Cell handbook, App. A: +// - shift/rotate having the amount in a register +// - arithmetic/logical having the RC flag set (except cmp*) +// - load/store algebraic (l?a*), multiple (lmw/stmw), string (ls*/sts*) +// - mtcrf (and some more SPR related, not used here) +// moreover, misaligned load/store crossing a cacheline boundary are microcoded. +// Note also that load/store string isn't available in little endian mode. + +// NB flag handling in PPC differs grossly from the ARM/X86 model. There are 8 +// fields in the condition register, each having 4 condition bits. However, only +// the EQ bit is similar to the Z flag. The CA and OV bits in the XER register +// are similar to the C and V bits, but shifts don't use CA, and cmp* doesn't +// use CA and OV. +// Moreover, there's no easy possibility to get CA and OV for 32 bit arithmetic +// since all arithmetic/logical insns use 64 bit. +// For now, use the "no flags" code from the RISC-V backend. + +#define HOST_REGS 32 + +// PPC64: params: r3-r10, return: r3, temp: r0,r11-r12, saved: r14-r31 +// reserved: r0(zero), r1(stack), r2(TOC), r13(TID) +// additionally reserved on OSX: r31(PIC), r30(frame), r11(parentframe) +// for OSX PIC code, on function calls r12 must contain the called address +#define TOC_REG 2 +#define RET_REG 3 +#define PARAM_REGS { 3, 4, 5, 6, 7, 8, 9, 10 } +#define PRESERVED_REGS { 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 } +#define TEMPORARY_REGS { 12 } + +#define CONTEXT_REG 29 +#define STATIC_SH2_REGS { SHR_SR,28 , SHR_R(0),27 , SHR_R(1),26 } + +// if RA is 0 in non-update memory insns, ADDI/ADDIS, ISEL, it aliases with zero +#define Z0 0 // zero register +#define SP 1 // stack pointer +#define CR 12 // call register +// SPR registers +#define XER -1 // exception register +#define LR -8 // link register +#define CTR -9 // counter register +// internally used by code emitter: +#define AT 0 // emitter temporary (can't be fully used anyway) +#define FNZ 14 // emulated processor flags: N (bit 31) ,Z (all bits) +#define FC 15 // emulated processor flags: C (bit 0), others 0 +#define FV 16 // emulated processor flags: Nt^Ns (bit 31). others x + + +// unified conditions; virtual, not corresponding to anything real on PPC +#define DCOND_EQ 0x0 +#define DCOND_NE 0x1 +#define DCOND_HS 0x2 +#define DCOND_LO 0x3 +#define DCOND_MI 0x4 +#define DCOND_PL 0x5 +#define DCOND_VS 0x6 +#define DCOND_VC 0x7 +#define DCOND_HI 0x8 +#define DCOND_LS 0x9 +#define DCOND_GE 0xa +#define DCOND_LT 0xb +#define DCOND_GT 0xc +#define DCOND_LE 0xd + +#define DCOND_CS DCOND_LO +#define DCOND_CC DCOND_HS + +// unified insn; use right-aligned bit offsets for the bitfields +#define PPC_INSN(op, b10, b15, b20, b31) \ + (((op)<<26)|((b10)<<21)|((b15)<<16)|((b20)<<11)|((b31)<<0)) + +#define _ 0 // marker for "field unused" +#define __(n) o##n // enum marker for "undefined" +#define _CB(v,l,s,d) ((((v)>>(s))&((1<<(l))-1))<<(d)) // copy l bits + +// NB everything privileged or unneeded at 1st sight is left out +// opcode field (encoded in OPCD, bits 0-5) +enum { OP__LMA=004, OP_MULLI=007, + OP_SUBFIC, __(11), OP_CMPLI, OP_CMPI, OP_ADDIC, OP_ADDICF, OP_ADDI, OP_ADDIS, + OP_BC, __(21), OP_B, OP__CR, OP_RLWIMI, OP_RLWINM, __(26), OP_RLWNM, + OP_ORI, OP_ORIS, OP_XORI, OP_XORIS, OP_ANDI, OP_ANDIS, OP__RLD, OP__EXT, + OP_LWZ, OP_LWZU, OP_LBZ, OP_LBZU, OP_STW, OP_STWU, OP_STB, OP_STBU, + OP_LHZ, OP_LHZU, OP_LHA, OP_LHAU, OP_STH, OP_STHU, OP_LMW, OP_STMW, + /*OP_LQ=070,*/ OP__LD=072, OP__ST=076 }; +// CR subops (encoded in bits 21-31) +enum { OPC_MCRF=0, OPC_BCLR=32, OPC_BCCTR=1056 }; +// RLD subops (encoded in XO bits 27-31) +enum { OPR_RLDICL=0, OPR_RLDICR=4, OPR_RLDIC=8, OPR_RLDIMI=12, OPR_RLDCL=16, OPR_RLDCR=18 }; +// EXT subops (encoded in XO bits 21-31) +enum { + // arith/logical + OPE_CMP=0, OPE_SUBFC=16, OPE_ADDC=20, OPE_AND=56, + OPE_CMPL=64, OPE_SUBF=80, OPE_ANDC=120, OPE_NEG=208, OPE_NOR=248, + OPE_SUBFE=272, OPE_ADDE=276, OPE_SUBFZE=400, OPE_ADDZE=404, OPE_SUBFME=464, OPE_ADDME=468, + OPE_ADD=532, OPE_EQV=568, OPE_XOR=632, OPE_ORC=824, OPE_OR=888, OPE_NAND=952, + // shift + OPE_SLW=48, OPE_SLD=54, OPE_SRW=1072, OPE_SRD=1078, OPE_SRAW=1584, OPE_SRAD=1588, OPE_SRAWI=1648, OPE_SRADI=1652, + // extend, bitcount + OPE_CNTLZW=52, OPE_CNTLZD=116, OPE_EXTSH=1844, OPE_EXTSB=1908, OPE_EXTSW=1972, + // mult/div + OPE_MULHDU=18, OPE_MULHWU=22, OPE_MULHD=146, OPE_MULHW=150, OPE_MULLD=466, OPE_MULLW=470, + OPE_DIVDU=914, OPE_DIVWU=918, OPE_DIVD=978, OPE_DIVW=982, + // load/store indexed + OPE_LDX=42, OPE_LDUX=106, OPE_STDX=298, OPE_STDUX=362, + OPE_LWZX=46, OPE_LWZUX=110, OPE_LWAX=682, OPE_LWAUX=746, OPE_STWX=302, OPE_STWUX=366, + OPE_LBZX=174, OPE_LBZUX=238, /* no LBAX/LBAUX... */ OPE_STBX=430, OPE_STBUX=494, + OPE_LHZX=558, OPE_LHZUX=622, OPE_LHAX=686, OPE_LHAUX=750, OPE_STHX=814, OPE_STHUX=878, + // SPR, CR related + OPE_ISEL=15, OPE_MFCR=38, OPE_MTCRF=288, OPE_MFSPR=678, OPE_MTSPR=934, OPE_MCRXR=1024, +}; +// LD subops (encoded in XO bits 30-31) +enum { OPL_LD, OPL_LDU, OPL_LWA }; +// ST subops (encoded in XO bits 30-31) +enum { OPS_STD, OPS_STDU /*,OPS_STQ*/ }; + +// X*,M*-forms insns often have overflow detect in b21 and CR0 update in b31 +#define XOE (1<<10) // (31-21) +#define XRC (1<<0) // (31-31) +#define XF (XOE|XRC) +// MB and ME in M*-forms rotate left +#define MM(b,e) (((b)<<6)|((e)<<1)) +#define MD(b,s) (_CB(b,5,0,6)|_CB(b,1,5,5)|_CB(s,5,0,11)|_CB(s,1,5,1)) +// AA and LK in I,B-forms branches +#define BAA (1<<1) +#define BLK (1<<0) +// BO and BI condition codes in B-form, BO0-BO4:BI2-BI4 since we only need CR0 +#define BLT 0x60 +#define BGE 0x20 +#define BGT 0x61 +#define BLE 0x21 +#define BEQ 0x62 +#define BNE 0x22 +#define BXX 0xa0 // unconditional, aka always + +#define PPC_NOP \ + PPC_INSN(OP_ORI, 0, 0, _, 0) // ori r0, r0, 0 + +// arithmetic/logical + +#define PPC_OP_REG(op, xop, rt, ra, rb) /* X*,M*-form */ \ + PPC_INSN((unsigned)op, rt, ra, rb, xop) +#define PPC_OP_IMM(op, rt, ra, imm) /* D,B,I-form */ \ + PPC_INSN((unsigned)op, rt, ra, _, imm) + +// rt = ra OP rb +#define PPC_ADD_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_ADD,rt,ra,rb) +#define PPC_ADDC_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_ADDC,rt,ra,rb) +#define PPC_SUB_REG(rt, rb, ra) /* NB reversed args (rb-ra) */ \ + PPC_OP_REG(OP__EXT,OPE_SUBF,rt,ra,rb) +#define PPC_SUBC_REG(rt, rb, ra) \ + PPC_OP_REG(OP__EXT,OPE_SUBFC,rt,ra,rb) +#define PPC_NEG_REG(rt, ra) \ + PPC_OP_REG(OP__EXT,OPE_NEG,rt,ra,_) + +#define PPC_CMP_REG(ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_CMP,1,ra,rb) +#define PPC_CMPL_REG(ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_CMPL,1,ra,rb) + +#define PPC_CMPW_REG(ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_CMP,0,ra,rb) +#define PPC_CMPLW_REG(ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_CMPL,0,ra,rb) + +#define PPC_XOR_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_XOR,rt,ra,rb) +#define PPC_OR_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_OR,rt,ra,rb) +#define PPC_ORN_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_ORC,rt,ra,rb) +#define PPC_NOR_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_NOR,rt,ra,rb) +#define PPC_AND_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_AND,rt,ra,rb) +#define PPC_BIC_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_ANDC,rt,ra,rb) + +#define PPC_MOV_REG(rt, ra) \ + PPC_OR_REG(rt, ra, ra) +#define PPC_MVN_REG(rt, ra) \ + PPC_NOR_REG(rt, ra, ra) + +// rt = ra OP rb OP carry +#define PPC_ADC_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_ADDE,rt,ra,rb) +#define PPC_SBC_REG(rt, rb, ra) \ + PPC_OP_REG(OP__EXT,OPE_SUBFE,rt,ra,rb) +#define PPC_NGC_REG(rt, ra) \ + PPC_OP_REG(OP__EXT,OPE_SUBFZE,rt,ra,_) + +// rt = ra SHIFT rb +#define PPC_LSL_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_SLD,rt,ra,rb) +#define PPC_LSR_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_SRD,rt,ra,rb) +#define PPC_ASR_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_SRAD,rt,ra,rb) +#define PPC_ROL_REG(ra, rt, rb) \ + PPC_OP_REG(OP__RLD,OPR_RLDCL,rt,ra,rb,0) + +#define PPC_LSLW_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_SLW,rt,ra,rb) +#define PPC_LSRW_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_SRW,rt,ra,rb) +#define PPC_ASRW_REG(ra, rt, rb) \ + PPC_OP_REG(OP__EXT,OPE_SRAW,rt,ra,rb) +#define PPC_ROLW_REG(ra, rt, rb) \ + PPC_OP_REG(OP_RLWNM,MM(0,31),rt,ra,rb) + +// rt = ra OP (imm16 << (0|16)) +#define PPC_ADD_IMM(rt, ra, imm16) \ + PPC_OP_IMM(OP_ADDI, rt, ra, imm16) +#define PPC_ADDT_IMM(rt, ra, imm16) \ + PPC_OP_IMM(OP_ADDIS, rt, ra, imm16) + +#define PPC_XOR_IMM(ra, rt, imm16) \ + PPC_OP_IMM(OP_XORI, rt, ra, imm16) +#define PPC_XORT_IMM(ra, rt, imm16) \ + PPC_OP_IMM(OP_XORIS, rt, ra, imm16) +#define PPC_OR_IMM(ra, rt, imm16) \ + PPC_OP_IMM(OP_ORI, rt, ra, imm16) +#define PPC_ORT_IMM(ra, rt, imm16) \ + PPC_OP_IMM(OP_ORIS, rt, ra, imm16) + +#define PPC_ANDS_IMM(rt, ra, imm16) \ + PPC_OP_IMM(OP_ANDI, rt, ra, imm16) +#define PPC_ANDTS_IMM(rt, ra, imm16) \ + PPC_OP_IMM(OP_ANDIS, rt, ra, imm16) +#define PPC_CMP_IMM(ra, imm16) \ + PPC_OP_IMM(OP_CMPI, 1, ra, imm16) +#define PPC_CMPL_IMM(ra, imm16) \ + PPC_OP_IMM(OP_CMPLI, 1, ra, imm16) + +#define PPC_CMPW_IMM(ra, imm16) \ + PPC_OP_IMM(OP_CMPI, 0, ra, imm16) +#define PPC_CMPLW_IMM(ra, imm16) \ + PPC_OP_IMM(OP_CMPLI, 0, ra, imm16) + +#define PPC_TST_IMM(rt, imm16) \ + PPC_ANDS_IMM(Z0,ra,imm16) + +#define PPC_MOV_IMM(rt, ra, imm16) \ + PPC_ADD_IMM(rt,ra,imm16) +#define PPC_MOVT_IMM(rt, ra, imm16) \ + PPC_ADDT_IMM(rt,ra,imm16) + +// rt = EXTEND ra +#define PPC_EXTSW_REG(ra, rt) \ + PPC_OP_REG(OP__EXT,OPE_EXTSW,rt,ra,_) +#define PPC_EXTSH_REG(ra, rt) \ + PPC_OP_REG(OP__EXT,OPE_EXTSH,rt,ra,_) +#define PPC_EXTSB_REG(ra, rt) \ + PPC_OP_REG(OP__EXT,OPE_EXTSB,rt,ra,_) +#define PPC_EXTUW_REG(ra, rt) \ + PPC_OP_REG(OP__RLD,OPR_RLDICL|MD(32,0),rt,ra,_) +#define PPC_EXTUH_REG(ra, rt) \ + PPC_OP_REG(OP__RLD,OPR_RLDICL|MD(48,0),rt,ra,_) +#define PPC_EXTUB_REG(ra, rt) \ + PPC_OP_REG(OP__RLD,OPR_RLDICL|MD(56,0),rt,ra,_) + +// rt = ra SHIFT imm5/imm6 +#define PPC_LSL_IMM(ra, rt, bits) \ + PPC_OP_REG(OP__RLD,OPR_RLDICR|MD(63-(bits),bits),rt,ra,_) +#define PPC_LSR_IMM(ra, rt, bits) \ + PPC_OP_REG(OP__RLD,OPR_RLDICL|MD(bits,64-(bits)),rt,ra,_) +#define PPC_ASR_IMM(ra, rt, bits) \ + PPC_OP_REG(OP__EXT,OPE_SRADI|MD(_,bits),rt,ra,_) +#define PPC_ROL_IMM(ra, rt, bits) \ + PPC_OP_REG(OP__RLD,OPR_RLDICL|MD(0,bits),rt,ra,_) + +#define PPC_LSLW_IMM(ra, rt, bits) \ + PPC_OP_REG(OP_RLWINM,MM(0,31-(bits)),rt,ra,bits) +#define PPC_LSRW_IMM(ra, rt, bits) \ + PPC_OP_REG(OP_RLWINM,MM(bits,31),rt,ra,32-(bits)) +#define PPC_ASRW_IMM(ra, rt, bits) \ + PPC_OP_REG(OP__EXT,OPE_SRAWI,rt,ra,bits) +#define PPC_ROLW_IMM(ra, rt, bits) \ + PPC_OP_REG(OP_RLWINM,MM(0,31),rt,ra,bits) + +// rt = EXTRACT/INSERT ra +#define PPC_BFX_IMM(ra, rt, lsb, bits) \ + PPC_OP_REG(OP__RLD,OPR_RLDICL|MD(64-(bits),63&(lsb+bits)),rt,ra,_) +#define PPC_BFXD_IMM(ra, rt, lsb, bits) /* extract to high bits, 64 bit */ \ + PPC_OP_REG(OP__RLD,OPR_RLDICR|MD(bits-1,lsb),rt,ra,_) +#define PPC_BFI_IMM(ra, rt, lsb, bits) \ + PPC_OP_REG(OP__RLD,OPR_RLDIMI|MD(lsb,64-(lsb+bits)),rt,ra,_) + +#define PPC_BFXW_IMM(ra, rt, lsb, bits) \ + PPC_OP_REG(OP_RLWINM,MM(32-(bits),31),rt,ra,31&(lsb+bits)) +#define PPC_BFXT_IMM(ra, rt, lsb, bits) /* extract to high bits, 32 bit */ \ + PPC_OP_REG(OP_RLWINM,MM(0,bits-1),rt,ra,lsb) +#define PPC_BFIW_IMM(ra, rt, lsb, bits) \ + PPC_OP_REG(OP_RLWIMI,MM(lsb,lsb+bits-1),rt,ra,32-(lsb+bits)) + +// multiplication; NB in 32 bit results the topmost 32 bits are undefined +#define PPC_MULL(rt, ra, rb) /* 64 bit */ \ + PPC_OP_REG(OP__EXT,OPE_MULLD,rt,ra,rb) +#define PPC_MUL(rt, ra, rb) /* low 32 bit */ \ + PPC_OP_REG(OP__EXT,OPE_MULLW,rt,ra,rb) +#define PPC_MULHS(rt, ra, rb) /* high 32 bit, signed */ \ + PPC_OP_REG(OP__EXT,OPE_MULHW,rt,ra,rb) +#define PPC_MULHU(rt, ra, rb) /* high 32 bit, unsigned */ \ + PPC_OP_REG(OP__EXT,OPE_MULHWU,rt,ra,rb) +// XXX use MAC* insns from the LMA group? + +// branching (only PC-relative) + +#define PPC_B(offs26) \ + PPC_OP_IMM(OP_B,_,_,(offs26)&~3) +#define PPC_BL(offs26) \ + PPC_OP_IMM(OP_B,_,_,((offs26)&~3)|BLK) +#define PPC_RET() \ + PPC_OP_REG(OP__CR,OPC_BCLR,BXX>>3,_,_) +#define PPC_RETCOND(cond) \ + PPC_OP_REG(OP__CR,OPC_BCLR,(cond)>>3,(cond)&0x7,_) +#define PPC_BCTRCOND(cond) \ + PPC_OP_REG(OP__CR,OPC_BCCTR,(cond)>>3,(cond)&0x7,_) +#define PPC_BLCTRCOND(cond) \ + PPC_OP_REG(OP__CR,OPC_BCCTR|BLK,(cond)>>3,(cond)&0x7,_) +#define PPC_BCOND(cond, offs19) \ + PPC_OP_IMM(OP_BC,(cond)>>3,(cond)&0x7,(offs19)&~3) + +// load/store, offset + +#define PPC_LDX_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP__LD,rt,ra,((u16)(offs16)&~3)|OPL_LD) +#define PPC_LDW_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_LWZ,rt,ra,(u16)(offs16)) +#define PPC_LDH_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_LHZ,rt,ra,(u16)(offs16)) +#define PPC_LDB_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_LBZ,rt,ra,(u16)(offs16)) + +#define PPC_LDSH_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_LHA,rt,ra,(u16)(offs16)) + +#define PPC_STX_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP__ST,rt,ra,((u16)(offs16)&~3)|OPS_STD) +#define PPC_STW_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_STW,rt,ra,(u16)(offs16)) +#define PPC_STH_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_STH,rt,ra,(u16)(offs16)) +#define PPC_STB_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_STB,rt,ra,(u16)(offs16)) + +#define PPC_STXU_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP__ST,rt,ra,((u16)(offs16)&~3)|OPS_STDU) +#define PPC_STWU_IMM(rt, ra, offs16) \ + PPC_OP_IMM(OP_STWU,rt,ra,(u16)(offs16)) + +// load/store, indexed + +#define PPC_LDX_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_LDX,rt,ra,rb) +#define PPC_LDW_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_LWZX,rt,ra,rb) +#define PPC_LDH_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_LHZX,rt,ra,rb) +#define PPC_LDB_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_LBZX,rt,ra,rb) + +#define PPC_LDSH_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_LHAX,rt,ra,rb) + +#define PPC_STX_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_STX,rt,ra,rb) +#define PPC_STW_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_STWX,rt,ra,rb) +#define PPC_STH_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_STHX,rt,ra,rb) +#define PPC_STB_REG(rt, ra, rb) \ + PPC_OP_REG(OP__EXT,OPE_STBX,rt,ra,rb) + +// special regs: LR, CTR, XER, CR + +#define PPC_MFSP_REG(rt, spr) \ + PPC_OP_REG(OP__EXT,OPE_MFSPR,rt,_,_CB(-(spr),5,0,5)|_CB(-(spr),5,5,0)) +#define PPC_MTSP_REG(rs, spr) \ + PPC_OP_REG(OP__EXT,OPE_MTSPR,rs,_,_CB(-(spr),5,0,5)|_CB(-(spr),5,5,0)) + +#define PPC_MFCR_REG(rt) \ + PPC_OP_REG(OP__EXT,OPE_MFCR,rt,_,_) +#define PPC_MTCRF_REG(rs, fm) \ + PPC_OP_REG(OP__EXT,OPE_MTCRF,rs,_,(fm)<<1) +#define PPC_MCRXR_REG(crt) \ + PPC_OP_REG(OP__EXT,OPE_MCRXR,(crt)<<2,_,_) +#define PPC_MCRCR_REG(crt, crf) \ + PPC_OP_REG(OP__CR,OPC_MCRF,(crt)<<2,(crf)<<1,_) + +#ifdef __powerpc64__ +#define PTR_SCALE 3 +#define PPC_LDP_IMM PPC_LDX_IMM +#define PPC_LDP_REG PPC_LDX_REG +#define PPC_STP_IMM PPC_STX_IMM +#define PPC_STP_REG PPC_STX_REG +#define PPC_STPU_IMM PPC_STXU_IMM +#define PPC_BFXP_IMM PPC_BFX_IMM + +#define emith_uext_ptr(r) EMIT(PPC_EXTUW_REG(r, r)) + +// "long" multiplication, 32x32 bit = 64 bit +#define EMIT_PPC_MULLU_REG(dlo, dhi, s1, s2) do { \ + int at = (dlo == s1 || dlo == s2 ? AT : dlo); \ + EMIT(PPC_MUL(at, s1, s2)); \ + EMIT(PPC_MULHU(dhi, s1, s2)); \ + if (at != dlo) emith_move_r_r(dlo, at); \ +} while (0) + +#define EMIT_PPC_MULLS_REG(dlo, dhi, s1, s2) do { \ + EMIT(PPC_MUL(dlo, s1, s2)); \ + EMIT(PPC_ASR_IMM(dhi, dlo, 32)); \ +} while (0) + +#define EMIT_PPC_MACLS_REG(dlo, dhi, s1, s2) do { \ + EMIT(PPC_MUL(AT, s1, s2)); \ + EMIT(PPC_BFI_IMM(dlo, dhi, 0, 32)); \ + emith_add_r_r(dlo, AT); \ + EMIT(PPC_ASR_IMM(dhi, dlo, 32)); \ +} while (0) +#else +#define PTR_SCALE 2 +#define PPC_LDP_IMM PPC_LDW_IMM +#define PPC_LDP_REG PPC_LDW_REG +#define PPC_STP_IMM PPC_STW_IMM +#define PPC_STP_REG PPC_STW_REG +#define PPC_STPU_IMM PPC_STWU_IMM +#define PPC_BFXP_IMM PPC_BFXW_IMM + +#define emith_uext_ptr(r) /**/ + +// "long" multiplication, 32x32 bit = 64 bit +#define EMIT_PPC_MULLU_REG(dlo, dhi, s1, s2) do { \ + int at = (dlo == s1 || dlo == s2 ? AT : dlo); \ + EMIT(PPC_MUL(at, s1, s2)); \ + EMIT(PPC_MULHU(dhi, s1, s2)); \ + if (at != dlo) emith_move_r_r(dlo, at); \ +} while (0) + +#define EMIT_PPC_MULLS_REG(dlo, dhi, s1, s2) do { \ + int at = (dlo == s1 || dlo == s2 ? AT : dlo); \ + EMIT(PPC_MUL(at, s1, s2)); \ + EMIT(PPC_MULHS(dhi, s1, s2)); \ + if (at != dlo) emith_move_r_r(dlo, at); \ +} while (0) + +#define EMIT_PPC_MACLS_REG(dlo, dhi, s1, s2) do { \ + int t_ = rcache_get_tmp(); \ + EMIT_PPC_MULLS_REG(t_, AT, s1, s2); \ + EMIT(PPC_ADDC_REG(dlo, dlo, t_)); \ + EMIT(PPC_ADC_REG(dhi, dhi, AT)); \ + rcache_free_tmp(t_); \ +} while (0) +#endif +#define PTR_SIZE (1<>1 since the lowest bit inverts the cond */ \ + unsigned _mv = BITMASK3(DCOND_VS>>1,DCOND_GE>>1,DCOND_GT>>1); \ + unsigned _mc = _mv | BITMASK2(DCOND_HS>>1,DCOND_HI>>1); \ + emith_flg_hint = (_mv & BITMASK1(cond >> 1) ? _FHV : 0); \ + emith_flg_hint |= (_mc & BITMASK1(cond >> 1) ? _FHC : 0); \ +} while (0) + +// store minimal cc information: rt, rb^ra, carry +// NB: the result *must* first go to FNZ, in case rt == ra or rt == rb. +// NB: for adcf and sbcf, carry-in must be dealt with separately (see there) +static void emith_set_arith_flags(int rt, int ra, int rb, s32 imm, int sub) +{ + if (emith_flg_hint & _FHC) { + if (sub) // C = sub:rb= 0) // Nt^Ns in FV, bit 31 + EMIT(PPC_XOR_REG(FV, ra, rb)); + else if (imm == 0) + emith_flg_noV = 1; // imm #0 can't overflow + else if ((imm < 0) == !sub) + EMIT(PPC_MVN_REG(FV, ra)); + else if ((imm > 0) == !sub) + EMIT(PPC_MOV_REG(FV, ra)); + } + // full V = Nd^Nt^Ns^C calculation is deferred until really needed + + if (rt && rt != FNZ) + EMIT(PPC_MOV_REG(rt, FNZ)); // N,Z via result value in FNZ + emith_cmp_ra = emith_cmp_rb = -1; +} + +// handle cmp separately by storing the involved regs for later use. +// this works for all conditions but VC/VS, but this is fortunately never used. +static void emith_set_compare_flags(int ra, int rb, s32 imm) +{ + emith_cmp_rb = rb; + emith_cmp_ra = ra; + emith_cmp_imm = imm; +} + + +// data processing, register + +#define emith_move_r_r_ptr(d, s) \ + EMIT(PPC_MOV_REG(d, s)) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_ptr(d, s) + +#define emith_move_r_r(d, s) \ + emith_move_r_r_ptr(d, s) +#define emith_move_r_r_c(cond, d, s) \ + emith_move_r_r(d, s) + +#define emith_mvn_r_r(d, s) \ + EMIT(PPC_MVN_REG(d, s)) + +#define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_ADD_REG(d, s1, AT)); \ + } else EMIT(PPC_ADD_REG(d, s1, s2)); \ +} while (0) +#define emith_add_r_r_r_lsl(d, s1, s2, simm) \ + emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) + +#define emith_add_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSRW_IMM(AT, s2, simm)); \ + EMIT(PPC_ADD_REG(d, s1, AT)); \ + } else EMIT(PPC_ADD_REG(d, s1, s2)); \ +} while (0) + +#define emith_addf_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_ADD_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(PPC_ADD_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) +#define emith_addf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_ADD_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(PPC_ADD_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_addf_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSRW_IMM(AT, s2, simm)); \ + EMIT(PPC_ADD_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(PPC_ADD_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_sub_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_SUB_REG(d, s1, AT)); \ + } else EMIT(PPC_SUB_REG(d, s1, s2)); \ +} while (0) + +#define emith_subf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_SUB_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 1); \ + } else { \ + EMIT(PPC_SUB_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + } \ +} while (0) + +#define emith_or_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_OR_REG(d, s1, AT)); \ + } else EMIT(PPC_OR_REG(d, s1, s2)); \ +} while (0) + +#define emith_or_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSRW_IMM(AT, s2, simm)); \ + EMIT(PPC_OR_REG(d, s1, AT)); \ + } else EMIT(PPC_OR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_XOR_REG(d, s1, AT)); \ + } else EMIT(PPC_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSRW_IMM(AT, s2, simm)); \ + EMIT(PPC_XOR_REG(d, s1, AT)); \ + } else EMIT(PPC_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_and_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(PPC_LSLW_IMM(AT, s2, simm)); \ + EMIT(PPC_AND_REG(d, s1, AT)); \ + } else EMIT(PPC_AND_REG(d, s1, s2)); \ +} while (0) + +#define emith_or_r_r_lsl(d, s, lslimm) \ + emith_or_r_r_r_lsl(d, d, s, lslimm) +#define emith_or_r_r_lsr(d, s, lsrimm) \ + emith_or_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_eor_r_r_lsl(d, s, lslimm) \ + emith_eor_r_r_r_lsl(d, d, s, lslimm) +#define emith_eor_r_r_lsr(d, s, lsrimm) \ + emith_eor_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_add_r_r_r(d, s1, s2) \ + emith_add_r_r_r_lsl(d, s1, s2, 0) + +#define emith_addf_r_r_r_ptr(d, s1, s2) \ + emith_addf_r_r_r_lsl_ptr(d, s1, s2, 0) +#define emith_addf_r_r_r(d, s1, s2) \ + emith_addf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_sub_r_r_r(d, s1, s2) \ + emith_sub_r_r_r_lsl(d, s1, s2, 0) + +#define emith_subf_r_r_r(d, s1, s2) \ + emith_subf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_or_r_r_r(d, s1, s2) \ + emith_or_r_r_r_lsl(d, s1, s2, 0) + +#define emith_eor_r_r_r(d, s1, s2) \ + emith_eor_r_r_r_lsl(d, s1, s2, 0) + +#define emith_and_r_r_r(d, s1, s2) \ + emith_and_r_r_r_lsl(d, s1, s2, 0) + +#define emith_add_r_r_ptr(d, s) \ + emith_add_r_r_r_lsl_ptr(d, d, s, 0) +#define emith_add_r_r(d, s) \ + emith_add_r_r_r(d, d, s) + +#define emith_sub_r_r(d, s) \ + emith_sub_r_r_r(d, d, s) + +#define emith_neg_r_r(d, s) \ + EMIT(PPC_NEG_REG(d, s)) + +#define emith_adc_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(AT, s2, FC); \ + emith_add_r_r_r(d, s1, AT); \ +} while (0) + +#define emith_sbc_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(AT, s2, FC); \ + emith_sub_r_r_r(d, s1, AT); \ +} while (0) + +#define emith_adc_r_r(d, s) \ + emith_adc_r_r_r(d, d, s) + +#define emith_negc_r_r(d, s) do { \ + emith_neg_r_r(d, s); \ + emith_sub_r_r(d, FC); \ +} while (0) + +// NB: the incoming carry Cin can cause Cout if s2+Cin=0 (or s1+Cin=0 FWIW) +// moreover, if s2+Cin=0 caused Cout, s1+s2+Cin=s1+0 can't cause another Cout +#define emith_adcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT_PPC_SLTWU_REG(AT, FNZ, FC); \ + emith_add_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_sbcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT_PPC_SLTWU_REG(AT, FNZ, FC); \ + emith_sub_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_and_r_r(d, s) \ + emith_and_r_r_r(d, d, s) +#define emith_and_r_r_c(cond, d, s) \ + emith_and_r_r(d, s) + +#define emith_or_r_r(d, s) \ + emith_or_r_r_r(d, d, s) + +#define emith_eor_r_r(d, s) \ + emith_eor_r_r_r(d, d, s) + +#define emith_tst_r_r_ptr(d, s) do { \ + if (d != s) { \ + emith_and_r_r_r(FNZ, d, s); \ + emith_cmp_ra = emith_cmp_rb = -1; \ + } else emith_cmp_ra = s, emith_cmp_rb = -1, emith_cmp_imm = 0; \ +} while (0) +#define emith_tst_r_r(d, s) \ + emith_tst_r_r_ptr(d, s) + +#define emith_teq_r_r(d, s) do { \ + emith_eor_r_r_r(FNZ, d, s); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_cmp_r_r(d, s) \ + emith_set_compare_flags(d, s, 0) +// emith_subf_r_r_r(FNZ, d, s) + +#define emith_addf_r_r(d, s) \ + emith_addf_r_r_r(d, d, s) + +#define emith_subf_r_r(d, s) \ + emith_subf_r_r_r(d, d, s) + +#define emith_adcf_r_r(d, s) \ + emith_adcf_r_r_r(d, d, s) + +#define emith_sbcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, d, s) + +#define emith_negcf_r_r(d, s) do { \ + emith_add_r_r_r(FNZ, s, FC); \ + EMIT_PPC_SLTWU_REG(AT, FNZ, FC); \ + emith_neg_r_r(FNZ, FNZ); \ + emith_set_arith_flags(d, Z0, s, 0, 1); \ + emith_or_r_r(FC, AT); \ +} while (0) + +// move immediate + +static void emith_move_imm(int r, int ptr, uintptr_t imm) +{ +#ifdef __powerpc64__ + if (ptr && (s32)imm != imm) { + emith_move_imm(r, 0, imm >> 32); + if (imm >> 32) + EMIT(PPC_LSL_IMM(r, r, 32)); + if (imm & 0x0000ffff) + EMIT(PPC_OR_IMM(r, r, imm & 0x0000ffff)); + if (imm & 0xffff0000) + EMIT(PPC_ORT_IMM(r, r, (imm & 0xffff0000) >> 16)); + } else +#endif + if ((s16)imm != (s32)imm) { + EMIT(PPC_ADDT_IMM(r, Z0, (u16)(imm>>16))); + if ((s16)imm) + EMIT(PPC_OR_IMM(r, r, (u16)(imm))); + } else EMIT(PPC_ADD_IMM(r, Z0, (u16)imm)); +} + +#define emith_move_r_ptr_imm(r, imm) \ + emith_move_imm(r, 1, (uintptr_t)(imm)) + +#define emith_move_r_imm(r, imm) \ + emith_move_imm(r, 0, (u32)(imm)) +#define emith_move_r_imm_c(cond, r, imm) \ + emith_move_r_imm(r, imm) + +#define emith_move_r_imm_s8_patchable(r, imm) \ + EMIT(PPC_ADD_IMM(r, Z0, (s8)(imm))) +#define emith_move_r_imm_s8_patch(ptr, imm) do { \ + u32 *ptr_ = (u32 *)ptr; \ + EMIT_PTR(ptr_, (*ptr_ & 0xffff0000) | (u16)(s8)(imm)); \ +} while (0) + +// arithmetic, immediate - can only be ADDI, since SUBI doesn't exist + +static void emith_add_imm(int rt, int ra, u32 imm) +{ + int s = ra; + if ((u16)imm) { + EMIT(PPC_ADD_IMM(rt, s, (u16)imm)); + s = rt; + } + // adjust for sign extension in ADDI + imm = (imm >> 16) + ((s16)imm < 0); + if ((u16)imm || rt != s) + EMIT(PPC_ADDT_IMM(rt, s, (u16)imm)); +} + +#define emith_add_r_imm(r, imm) \ + emith_add_r_r_imm(r, r, imm) +#define emith_add_r_imm_c(cond, r, imm) \ + emith_add_r_imm(r, imm) + +#define emith_addf_r_imm(r, imm) \ + emith_addf_r_r_imm(r, imm) + +#define emith_sub_r_imm(r, imm) \ + emith_sub_r_r_imm(r, r, imm) +#define emith_sub_r_imm_c(cond, r, imm) \ + emith_sub_r_imm(r, imm) + +#define emith_subf_r_imm(r, imm) \ + emith_subf_r_r_imm(r, r, imm) + +#define emith_adc_r_imm(r, imm) \ + emith_adc_r_r_imm(r, r, imm) + +#define emith_adcf_r_imm(r, imm) \ + emith_adcf_r_r_imm(r, r, imm) + +#define emith_cmp_r_imm(r, imm) \ + emith_set_compare_flags(r, -1, imm) +// emith_subf_r_r_imm(FNZ, r, (s16)imm) + +#define emith_add_r_r_ptr_imm(d, s, imm) \ + emith_add_imm(d, s, imm) + +#define emith_add_r_r_imm(d, s, imm) \ + emith_add_r_r_ptr_imm(d, s, imm) + +#define emith_addf_r_r_imm(d, s, imm) do { \ + emith_add_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, -1, imm, 0); \ +} while (0) + +#define emith_adc_r_r_imm(d, s, imm) do { \ + emith_add_r_r_r(AT, s, FC); \ + emith_add_r_r_imm(d, AT, imm); \ +} while (0) + + +#define emith_adcf_r_r_imm(d, s, imm) do { \ + if (imm == 0) { \ + emith_add_r_r_r(FNZ, s, FC); \ + emith_set_arith_flags(d, s, -1, 1, 0); \ + } else { \ + emith_add_r_r_r(FNZ, s, FC); \ + EMIT_PPC_SLTWU_REG(AT, FNZ, FC); \ + emith_add_r_r_imm(FNZ, FNZ, imm); \ + emith_set_arith_flags(d, s, -1, imm, 0); \ + emith_or_r_r(FC, AT); \ + } \ +} while (0) + +// NB: no SUBI, since ADDI takes a signed imm +#define emith_sub_r_r_imm(d, s, imm) \ + emith_add_r_r_imm(d, s, -(imm)) +#define emith_sub_r_r_imm_c(cond, d, s, imm) \ + emith_sub_r_r_imm(d, s, imm) + +#define emith_subf_r_r_imm(d, s, imm) do { \ + emith_sub_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, -1, imm, 1); \ +} while (0) + +// logical, immediate + +#define emith_log_imm2(opi, opr, rt, ra, imm) do { \ + if ((imm) >> 16 || opi == OP_ANDI) { /* too big, or microcoded ANDI */ \ + emith_move_r_imm(AT, imm); \ + EMIT(PPC_OP_REG(OP__EXT, opr, ra, rt, AT)); \ + } else if (/*opi == OP_ANDI ||*/ imm || rt != ra) \ + EMIT(PPC_OP_IMM(opi, ra, rt, imm)); \ +} while (0) +#define emith_log_imm(op, rt, ra, imm) \ + emith_log_imm2(OP_##op##I, OPE_##op, rt, ra, imm) + +#define emith_and_r_imm(r, imm) \ + emith_log_imm(AND, r, r, imm) + +#define emith_or_r_imm(r, imm) \ + emith_log_imm(OR, r, r, imm) +#define emith_or_r_imm_c(cond, r, imm) \ + emith_or_r_imm(r, imm) + +#define emith_eor_r_imm_ptr(r, imm) \ + emith_log_imm(XOR, r, r, imm) +#define emith_eor_r_imm_ptr_c(cond, r, imm) \ + emith_eor_r_imm_ptr(r, imm) + +#define emith_eor_r_imm(r, imm) \ + emith_eor_r_imm_ptr(r, imm) +#define emith_eor_r_imm_c(cond, r, imm) \ + emith_eor_r_imm(r, imm) + +/* NB: BIC #imm not available; use AND #~imm instead */ +#define emith_bic_r_imm(r, imm) \ + emith_log_imm(AND, r, r, ~(imm)) +#define emith_bic_r_imm_c(cond, r, imm) \ + emith_bic_r_imm(r, imm) + +#define emith_tst_r_imm(r, imm) do { \ + emith_log_imm(AND, FNZ, r, imm); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) +#define emith_tst_r_imm_c(cond, r, imm) \ + emith_tst_r_imm(r, imm) + +#define emith_and_r_r_imm(d, s, imm) \ + emith_log_imm(AND, d, s, imm) + +#define emith_or_r_r_imm(d, s, imm) \ + emith_log_imm(OR, d, s, imm) + +#define emith_eor_r_r_imm(d, s, imm) \ + emith_log_imm(XOR, d, s, imm) + +// shift + +#define emith_lsl(d, s, cnt) \ + EMIT(PPC_LSLW_IMM(d, s, cnt)) + +#define emith_lsr(d, s, cnt) \ + EMIT(PPC_LSRW_IMM(d, s, cnt)) + +#define emith_asr(d, s, cnt) \ + EMIT(PPC_ASRW_IMM(d, s, cnt)) + +#define emith_ror(d, s, cnt) \ + EMIT(PPC_ROLW_IMM(d, s, 32-(cnt))) +#define emith_ror_c(cond, d, s, cnt) \ + emith_ror(d, s, cnt) + +#define emith_rol(d, s, cnt) \ + EMIT(PPC_ROLW_IMM(d, s, cnt)); \ + +#define emith_rorc(d) do { \ + emith_lsr(d, d, 1); \ + emith_lsl(AT, FC, 31); \ + emith_or_r_r(d, AT); \ +} while (0) + +#define emith_rolc(d) do { \ + emith_lsl(d, d, 1); \ + emith_or_r_r(d, FC); \ +} while (0) + +// NB: all flag setting shifts make V undefined +#define emith_lslf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsl(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_lsr(FC, _s, 31); \ + emith_lsl(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_lsrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_lsr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_asrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_asr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_asr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_rolf(d, s, cnt) do { \ + emith_rol(d, s, cnt); \ + emith_and_r_r_imm(FC, d, 1); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_rorf(d, s, cnt) do { \ + emith_ror(d, s, cnt); \ + emith_lsr(FC, d, 31); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_rolcf(d) do { \ + emith_lsr(AT, d, 31); \ + emith_lsl(d, d, 1); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +#define emith_rorcf(d) do { \ + emith_and_r_r_imm(AT, d, 1); \ + emith_lsr(d, d, 1); \ + emith_lsl(FC, FC, 31); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +// signed/unsigned extend + +#define emith_clear_msb(d, s, count) /* bits to clear */ \ + EMIT(PPC_BFXW_IMM(d, s, count, 32-(count))) + +#define emith_clear_msb_c(cond, d, s, count) \ + emith_clear_msb(d, s, count) + +#define emith_sext(d, s, count) /* bits to keep */ do { \ + if (count == 8) \ + EMIT(PPC_EXTSB_REG(d, s)); \ + else if (count == 16) \ + EMIT(PPC_EXTSH_REG(d, s)); \ + else { \ + emith_lsl(d, s, 32-(count)); \ + emith_asr(d, d, 32-(count)); \ + } \ +} while (0) + +// multiply Rd = Rn*Rm (+ Ra) + +#define emith_mul(d, s1, s2) \ + EMIT(PPC_MUL(d, s1, s2)) + +#define emith_mul_u64(dlo, dhi, s1, s2) \ + EMIT_PPC_MULLU_REG(dlo, dhi, s1, s2) + +#define emith_mul_s64(dlo, dhi, s1, s2) \ + EMIT_PPC_MULLS_REG(dlo, dhi, s1, s2) + +#define emith_mula_s64(dlo, dhi, s1, s2) \ + EMIT_PPC_MACLS_REG(dlo, dhi, s1, s2) +#define emith_mula_s64_c(cond, dlo, dhi, s1, s2) \ + emith_mula_s64(dlo, dhi, s1, s2) + +// load/store. offs has 16 bits signed, which is currently sufficient +#define emith_read_r_r_offs_ptr(r, ra, offs) \ + EMIT(PPC_LDP_IMM(r, ra, offs)) +#define emith_read_r_r_offs_ptr_c(cond, r, ra, offs) \ + emith_read_r_r_offs_ptr(r, ra, offs) + +#define emith_read_r_r_offs(r, ra, offs) \ + EMIT(PPC_LDW_IMM(r, ra, offs)) +#define emith_read_r_r_offs_c(cond, r, ra, offs) \ + emith_read_r_r_offs(r, ra, offs) + +#define emith_read_r_r_r_ptr(r, ra, rm) \ + EMIT(PPC_LDP_REG(r, ra, rm)) + +#define emith_read_r_r_r(r, ra, rm) \ + EMIT(PPC_LDW_REG(r, ra, rm)) +#define emith_read_r_r_r_c(cond, r, ra, rm) \ + emith_read_r_r_r(r, ra, rm) + +#define emith_read8_r_r_offs(r, ra, offs) \ + EMIT(PPC_LDB_IMM(r, ra, offs)) +#define emith_read8_r_r_offs_c(cond, r, ra, offs) \ + emith_read8_r_r_offs(r, ra, offs) + +#define emith_read8_r_r_r(r, ra, rm) \ + EMIT(PPC_LDB_REG(r, ra, rm)) +#define emith_read8_r_r_r_c(cond, r, ra, rm) \ + emith_read8_r_r_r(r, ra, rm) + +#define emith_read16_r_r_offs(r, ra, offs) \ + EMIT(PPC_LDH_IMM(r, ra, offs)) +#define emith_read16_r_r_offs_c(cond, r, ra, offs) \ + emith_read16_r_r_offs(r, ra, offs) + +#define emith_read16_r_r_r(r, ra, rm) \ + EMIT(PPC_LDH_REG(r, ra, rm)) +#define emith_read16_r_r_r_c(cond, r, ra, rm) \ + emith_read16_r_r_r(r, ra, rm) + +#define emith_read8s_r_r_offs(r, ra, offs) do { \ + EMIT(PPC_LDB_IMM(r, ra, offs)); \ + EMIT(PPC_EXTSB_REG(r, r)); \ +} while (0) +#define emith_read8s_r_r_offs_c(cond, r, ra, offs) \ + emith_read8s_r_r_offs(r, ra, offs) + +#define emith_read8s_r_r_r(r, ra, rm) do { \ + EMIT(PPC_LDB_REG(r, ra, rm)); \ + EMIT(PPC_EXTSB_REG(r, r)); \ +} while (0) +#define emith_read8s_r_r_r_c(cond, r, ra, rm) \ + emith_read8s_r_r_r(r, ra, rm) + +#define emith_read16s_r_r_offs(r, ra, offs) do { \ + EMIT(PPC_LDH_IMM(r, ra, offs)); \ + EMIT(PPC_EXTSH_REG(r, r)); \ +} while (0) +#define emith_read16s_r_r_offs_c(cond, r, ra, offs) \ + emith_read16s_r_r_offs(r, ra, offs) + +#define emith_read16s_r_r_r(r, ra, rm) do { \ + EMIT(PPC_LDH_REG(r, ra, rm)); \ + EMIT(PPC_EXTSH_REG(r, r)); \ +} while (0) +#define emith_read16s_r_r_r_c(cond, r, ra, rm) \ + emith_read16s_r_r_r(r, ra, rm) + + +#define emith_write_r_r_offs_ptr(r, ra, offs) \ + EMIT(PPC_STP_IMM(r, ra, offs)) +#define emith_write_r_r_offs_ptr_c(cond, r, ra, offs) \ + emith_write_r_r_offs_ptr(r, ra, offs) + +#define emith_write_r_r_r_ptr(r, ra, rm) \ + EMIT(PPC_STP_REG(r, ra, rm)) +#define emith_write_r_r_r_ptr_c(cond, r, ra, rm) \ + emith_write_r_r_r_ptr(r, ra, rm) + +#define emith_write_r_r_offs(r, ra, offs) \ + EMIT(PPC_STW_IMM(r, ra, offs)) +#define emith_write_r_r_offs_c(cond, r, ra, offs) \ + emith_write_r_r_offs(r, ra, offs) + +#define emith_write_r_r_r(r, ra, rm) \ + EMIT(PPC_STW_REG(r, ra, rm)) +#define emith_write_r_r_r_c(cond, r, ra, rm) \ + emith_write_r_r_r(r, ra, rm) + +#define emith_ctx_read_ptr(r, offs) \ + emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_read(r, offs) \ + emith_read_r_r_offs(r, CONTEXT_REG, offs) +#define emith_ctx_read_c(cond, r, offs) \ + emith_ctx_read(r, offs) + +#define emith_ctx_write_ptr(r, offs) \ + emith_write_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_write(r, offs) \ + emith_write_r_r_offs(r, CONTEXT_REG, offs) + +#define emith_ctx_read_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_read(r_, offs_); \ +} while (0) + +#define emith_ctx_write_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_write(r_, offs_); \ +} while (0) + +// function call handling +#define emith_save_caller_regs(mask) do { \ + int _c, _z = PTR_SIZE; u32 _m = mask & 0x1ff8; /* r3-r12 */ \ + if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align */ \ + int _s = count_bits(_m) * _z, _o = _s; \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ + for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) \ + { _o -= _z; if (_c) emith_write_r_r_offs_ptr(_c, SP, _o); } \ +} while (0) + +#define emith_restore_caller_regs(mask) do { \ + int _c, _z = PTR_SIZE; u32 _m = mask & 0x1ff8; \ + if (__builtin_parity(_m) == 1) _m |= 0x1; \ + int _s = count_bits(_m) * _z, _o = 0; \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs_ptr(_c, SP, _o); _o += _z; } \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ +} while (0) + +#if defined __PS3__ +// on PS3 a C function pointer points to an array of 2 ptrs containing the start +// address and the TOC pointer for this function. TOC isn't used by the DRC though. +static void *fptr[2]; +#define host_call(addr, args) (fptr[0] = addr, (void (*) args)fptr) +#else +// with ELF we have the PLT which wraps functions needing any ABI register setup, +// hence a function ptr is simply the entry address of the function to execute. +#define host_call(addr, args) addr +#endif + +#define host_arg2reg(rt, arg) \ + rt = (arg+3) + +#define emith_pass_arg_r(arg, reg) \ + emith_move_r_r_ptr(arg, reg) + +#define emith_pass_arg_imm(arg, imm) \ + emith_move_r_ptr_imm(arg, imm) + +// branching +#define emith_invert_branch(cond) /* inverted conditional branch */ \ + ((cond) ^ 0x40) + +// evaluate the emulated condition, returns a register/branch type pair +static int emith_cmpr_check(int rs, int rt, int cond, u32 *op) +{ + int b = -1; + + // condition check for comparing 2 registers + switch (cond) { + case DCOND_EQ: *op = PPC_CMPW_REG(rs, rt); b = BEQ; break; + case DCOND_NE: *op = PPC_CMPW_REG(rs, rt); b = BNE; break; + case DCOND_LO: *op = PPC_CMPLW_REG(rs, rt); b = BLT; break; + case DCOND_HS: *op = PPC_CMPLW_REG(rs, rt); b = BGE; break; + case DCOND_LS: *op = PPC_CMPLW_REG(rs, rt); b = BLE; break; + case DCOND_HI: *op = PPC_CMPLW_REG(rs, rt); b = BGT; break; + case DCOND_LT: *op = PPC_CMPW_REG(rs, rt); b = BLT; break; + case DCOND_GE: *op = PPC_CMPW_REG(rs, rt); b = BGE; break; + case DCOND_LE: *op = PPC_CMPW_REG(rs, rt); b = BLE; break; + case DCOND_GT: *op = PPC_CMPW_REG(rs, rt); b = BGT; break; + } + + return b; +} + +static int emith_cmpi_check(int rs, s32 imm, int cond, u32 *op) +{ + int b = -1; + + // condition check for comparing register with immediate + switch (cond) { + case DCOND_EQ: *op = PPC_CMPW_IMM(rs, (u16)imm), b = BEQ; break; + case DCOND_NE: *op = PPC_CMPW_IMM(rs, (u16)imm), b = BNE; break; + case DCOND_LO: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = BLT; break; + case DCOND_HS: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = BGE; break; + case DCOND_LS: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = BLE; break; + case DCOND_HI: *op = PPC_CMPLW_IMM(rs, (u16)imm), b = BGT; break; + case DCOND_LT: *op = PPC_CMPW_IMM(rs, (u16)imm), b = BLT; break; + case DCOND_GE: *op = PPC_CMPW_IMM(rs, (u16)imm), b = BGE; break; + case DCOND_LE: *op = PPC_CMPW_IMM(rs, (u16)imm), b = BLE; break; + case DCOND_GT: *op = PPC_CMPW_IMM(rs, (u16)imm), b = BGT; break; + } + + return b; +} + +static int emith_cond_check(int cond) +{ + int b = -1; + u32 op = 0; + + if (emith_cmp_ra >= 0) { + if (emith_cmp_rb != -1) + b = emith_cmpr_check(emith_cmp_ra,emith_cmp_rb, cond,&op); + else b = emith_cmpi_check(emith_cmp_ra,emith_cmp_imm,cond,&op); + } + + // shortcut for V known to be 0 + if (b < 0 && emith_flg_noV) switch (cond) { + case DCOND_VS: /* no branch */ break; // never + case DCOND_VC: b = BXX; break; // always + case DCOND_LT: op = PPC_CMPW_IMM(FNZ, 0); b = BLT; break; // N + case DCOND_GE: op = PPC_CMPW_IMM(FNZ, 0); b = BGE; break; // !N + case DCOND_LE: op = PPC_CMPW_IMM(FNZ, 0); b = BLE; break; // N || Z + case DCOND_GT: op = PPC_CMPW_IMM(FNZ, 0); b = BGT; break; // !N && !Z + } + + // the full monty if no shortcut + if (b < 0) switch (cond) { + // conditions using NZ + case DCOND_EQ: op = PPC_CMPW_IMM(FNZ, 0); b = BEQ; break; // Z + case DCOND_NE: op = PPC_CMPW_IMM(FNZ, 0); b = BNE; break; // !Z + case DCOND_MI: op = PPC_CMPW_IMM(FNZ, 0); b = BLT; break; // N + case DCOND_PL: op = PPC_CMPW_IMM(FNZ, 0); b = BGE; break; // !N + // conditions using C + case DCOND_LO: op = PPC_CMPW_IMM(FC , 0); b = BNE; break; // C + case DCOND_HS: op = PPC_CMPW_IMM(FC , 0); b = BEQ; break; // !C + // conditions using CZ + case DCOND_LS: // C || Z + case DCOND_HI: // !C && !Z + EMIT(PPC_ADD_IMM(AT, FC, -1)); // !C && !Z + EMIT(PPC_AND_REG(AT, FNZ, AT)); + op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_HI ? BNE : BEQ); + break; + + // conditions using V + case DCOND_VS: // V + case DCOND_VC: // !V + EMIT(PPC_XOR_REG(AT, FV, FNZ)); // V = Nt^Ns^Nd^C + EMIT(PPC_LSRW_IMM(AT, AT, 31)); + EMIT(PPC_XOR_REG(AT, AT, FC)); + op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_VS ? BNE : BEQ); + break; + // conditions using VNZ + case DCOND_LT: // N^V + case DCOND_GE: // !(N^V) + EMIT(PPC_LSRW_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(PPC_XOR_REG(AT, FC, AT)); + op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_LT ? BNE : BEQ); + break; + case DCOND_LE: // (N^V) || Z + case DCOND_GT: // !(N^V) && !Z + EMIT(PPC_LSRW_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(PPC_XOR_REG(AT, FC, AT)); + EMIT(PPC_ADD_IMM(AT, AT, -1)); // !(Nd^V) && !Z + EMIT(PPC_AND_REG(AT, FNZ, AT)); + op = PPC_CMPW_IMM(AT , 0); b = (cond == DCOND_GT ? BNE : BEQ); + break; + } + + if (op) EMIT(op); + return b; +} + +#define emith_jump(target) do { \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + EMIT(PPC_B((uintptr_t)disp_ & 0x03ffffff)); \ +} while (0) +#define emith_jump_patchable(target) \ + emith_jump(target) + +// NB: PPC conditional branches have only +/- 64KB range +#define emith_jump_cond(cond, target) do { \ + int mcond_ = emith_cond_check(cond); \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + if (mcond_ >= 0) EMIT(PPC_BCOND(mcond_,disp_ & 0x0000ffff)); \ +} while (0) +#define emith_jump_cond_patchable(cond, target) \ + emith_jump_cond(cond, target) + +#define emith_jump_cond_inrange(target) \ + ((u8 *)target - (u8 *)tcache_ptr < 0x8000 && \ + (u8 *)target - (u8 *)tcache_ptr >= -0x8000+0x14) //mind cond_check + +// NB: returns position of patch for cache maintenance +#define emith_jump_patch(ptr, target, pos) do { \ + u32 *ptr_ = (u32 *)ptr; /* must skip condition check code */ \ + u32 disp_, mask_; \ + while (*ptr_>>26 != OP_BC && *ptr_>>26 != OP_B) ptr_ ++; \ + disp_ = (u8 *)target - (u8 *)ptr_; \ + mask_ = (*ptr_>>26 == OP_BC ? 0xffff0003 : 0xfc000003); \ + EMIT_PTR(ptr_, (*ptr_ & mask_) | (disp_ & ~mask_)); \ + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \ +} while (0) + +#define emith_jump_patch_inrange(ptr, target) \ + ((u8 *)target - (u8 *)ptr < 0x8000 && \ + (u8 *)target - (u8 *)ptr >= -0x8000+0x10) // mind cond_check +#define emith_jump_patch_size() 4 + +#define emith_jump_at(ptr, target) do { \ + u32 disp_ = (u8 *)target - (u8 *)ptr; \ + u32 *ptr_ = (u32 *)ptr; \ + EMIT_PTR(ptr_, PPC_B((uintptr_t)disp_ & 0x03ffffff)); \ +} while (0) +#define emith_jump_at_size() 4 + +#define emith_jump_reg(r) do { \ + EMIT(PPC_MTSP_REG(r, CTR)); \ + EMIT(PPC_BCTRCOND(BXX)); \ +} while(0) +#define emith_jump_reg_c(cond, r) \ + emith_jump_reg(r) + +#define emith_jump_ctx(offs) do { \ + emith_ctx_read_ptr(CR, offs); \ + emith_jump_reg(CR); \ +} while (0) +#define emith_jump_ctx_c(cond, offs) \ + emith_jump_ctx(offs) + +#define emith_call(target) do { \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + EMIT(PPC_BL((uintptr_t)disp_ & 0x03ffffff)); \ +} while(0) +#define emith_call_cond(cond, target) \ + emith_call(target) + +#define emith_call_reg(r) do { \ + EMIT(PPC_MTSP_REG(r, CTR)); \ + EMIT(PPC_BLCTRCOND(BXX)); \ +} while(0) + +#define emith_abicall_ctx(offs) do { \ + emith_ctx_read_ptr(CR, offs); \ + emith_abicall_reg(CR); \ +} while (0) + +#ifdef __PS3__ +#define emith_abijump_reg(r) \ + emith_read_r_r_offs_ptr(TOC_REG, r, PTR_SIZE); \ + emith_read_r_r_offs_ptr(CR, r, 0); \ + emith_jump_reg(CR) +#else +#define emith_abijump_reg(r) \ + if ((r) != CR) emith_move_r_r(CR, r); \ + emith_jump_reg(CR) +#endif +#define emith_abijump_reg_c(cond, r) \ + emith_abijump_reg(r) +#define emith_abicall(target) \ + emith_move_r_ptr_imm(CR, target); \ + emith_abicall_reg(CR); +#define emith_abicall_cond(cond, target) \ + emith_abicall(target) +#ifdef __PS3__ +#define emith_abicall_reg(r) do { \ + emith_read_r_r_offs_ptr(TOC_REG, r, PTR_SIZE); \ + emith_read_r_r_offs_ptr(CR, r, 0); \ + emith_call_reg(CR); \ +} while(0) +#else +#define emith_abicall_reg(r) do { \ + if ((r) != CR) emith_move_r_r(CR, r); \ + emith_call_reg(CR); \ +} while(0) +#endif + +#define emith_call_cleanup() /**/ + +#define emith_ret() \ + EMIT(PPC_RET()) +#define emith_ret_c(cond) \ + emith_ret() + +#define emith_ret_to_ctx(offs) do { \ + EMIT(PPC_MFSP_REG(AT, LR)); \ + emith_ctx_write_ptr(AT, offs); \ +} while (0) + +#define emith_add_r_ret(r) do { \ + EMIT(PPC_MFSP_REG(AT, LR)); \ + emith_add_r_r_ptr(r, AT); \ +} while (0) + +// NB: ABI SP alignment is 16 in 64 bit mode +#define emith_push_ret(r) do { \ + int offs_ = 16 - 2*PTR_SIZE; \ + emith_add_r_r_ptr_imm(SP, SP, -16); \ + EMIT(PPC_MFSP_REG(AT, LR)); \ + emith_write_r_r_offs_ptr(AT, SP, offs_ + PTR_SIZE); \ + if ((r) > 0) emith_write_r_r_offs(r, SP, offs_); \ +} while (0) + +#define emith_pop_and_ret(r) do { \ + int offs_ = 16 - 2*PTR_SIZE; \ + if ((r) > 0) emith_read_r_r_offs(r, SP, offs_); \ + emith_read_r_r_offs_ptr(AT, SP, offs_ + PTR_SIZE); \ + EMIT(PPC_MTSP_REG(AT, LR)); \ + emith_add_r_r_ptr_imm(SP, SP, 16); \ + emith_ret(); \ +} while (0) + + +// this should normally be in libc clear_cache; however, it sometimes isn't. +static NOINLINE void host_instructions_updated(void *base, void *end, int force) +{ + int step = 32, lgstep = 5; + char *_base = (char *)((uptr)base & ~(step-1)); + int count = (((char *)end - _base) >> lgstep) + 1; + + if (count <= 0) count = 1; // make sure count is positive + base = _base; + + asm volatile( + " mtctr %1;" + "0: dcbst 0,%0;" + " add %0, %0, %2;" + " bdnz 0b;" + " sync" + : "+r"(_base) : "r"(count), "r"(step) : "ctr"); + + asm volatile( + " mtctr %1;" + "0: icbi 0,%0;" + " add %0, %0, %2;" + " bdnz 0b;" + " isync" + : "+r"(base) : "r"(count), "r"(step) : "ctr"); +} + +// emitter ABI stuff +#define emith_pool_check() /**/ +#define emith_pool_commit(j) /**/ +#define emith_insn_ptr() ((u8 *)tcache_ptr) +#define emith_flush() /**/ +#define emith_update_cache() /**/ +#define emith_rw_offs_max() 0x7fff + +// SH2 drc specific +#define STACK_EXTRA ((8+6)*PTR_SIZE) // Param, ABI (LR,CR,FP etc) save areas +#define emith_sh2_drc_entry() do { \ + int _c, _z = PTR_SIZE; u32 _m = 0xffffc000; /* r14-r31 */ \ + if (__builtin_parity(_m) == 1) _m |= 0x1; /* ABI align for SP is 16 */ \ + int _s = count_bits(_m) * _z, _o = STACK_EXTRA; \ + EMIT(PPC_STPU_IMM(SP, SP, -_s-STACK_EXTRA)); \ + EMIT(PPC_MFSP_REG(AT, LR)); \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_write_r_r_offs_ptr(_c, SP, _o); _o += _z; } \ + emith_write_r_r_offs_ptr(AT, SP, _o + _z); \ +} while (0) +#define emith_sh2_drc_exit() do { \ + int _c, _z = PTR_SIZE; u32 _m = 0xffffc000; \ + if (__builtin_parity(_m) == 1) _m |= 0x1; \ + int _s = count_bits(_m) * _z, _o = STACK_EXTRA; \ + emith_read_r_r_offs_ptr(AT, SP, _o+_s + _z); \ + EMIT(PPC_MTSP_REG(AT, LR)); \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs_ptr(_c, SP, _o); _o += _z; } \ + emith_add_r_r_ptr_imm(SP, SP, _s+STACK_EXTRA); \ + emith_ret(); \ +} while (0) + +// NB: assumes a is in arg0, tab, func and mask are temp +#define emith_sh2_rcall(a, tab, func, mask) do { \ + emith_lsr(mask, a, SH2_READ_SHIFT); \ + emith_add_r_r_r_lsl_ptr(tab, tab, mask, PTR_SCALE+1); \ + emith_read_r_r_offs_ptr(func, tab, 0); \ + emith_read_r_r_offs(mask, tab, PTR_SIZE); \ + EMIT(PPC_BFXP_IMM(FC, func, 0, 1)); \ + emith_add_r_r_ptr(func, func); \ + emith_cmp_ra = emith_cmp_rb = -1; \ +} while (0) + +// NB: assumes a, val are in arg0 and arg1, tab and func are temp +#define emith_sh2_wcall(a, val, tab, func) do { \ + emith_lsr(func, a, SH2_WRITE_SHIFT); \ + emith_lsl(func, func, PTR_SCALE); \ + emith_read_r_r_r_ptr(CR, tab, func); \ + emith_move_r_r_ptr(5, CONTEXT_REG); /* arg2 */ \ + emith_abijump_reg(CR); \ +} while (0) + +#define emith_sh2_delay_loop(cycles, reg) do { \ + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \ + int t1 = rcache_get_tmp(); \ + int t2 = rcache_get_tmp(); \ + int t3 = rcache_get_tmp(); \ + /* if (sr < 0) return */ \ + emith_cmp_r_imm(sr, 0); \ + EMITH_JMP_START(DCOND_LE); \ + /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \ + emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ + rcache_free_tmp(t3); \ + if (reg >= 0) { \ + /* if (reg <= turns) turns = reg-1 */ \ + t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ + emith_cmp_r_r(t3, t2); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \ + EMITH_SJMP_END(DCOND_HI); \ + /* if (reg <= 1) turns = 0 */ \ + emith_cmp_r_imm(t3, 1); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_move_r_imm_c(DCOND_LS, t2, 0); \ + EMITH_SJMP_END(DCOND_HI); \ + /* reg -= turns */ \ + emith_sub_r_r(t3, t2); \ + } \ + /* sr.cycles -= turns * cycles; */ \ + emith_move_r_imm(t1, cycles); \ + emith_mul(t1, t2, t1); \ + emith_sub_r_r_r_lsl(sr, sr, t1, 12); \ + EMITH_JMP_END(DCOND_LE); \ + rcache_free_tmp(t1); \ + rcache_free_tmp(t2); \ +} while (0) + +/* + * T = !carry(Rn = (Rn << 1) | T) + * if Q + * C = carry(Rn += Rm) + * else + * C = carry(Rn -= Rm) + * T ^= C + */ +#define emith_sh2_div1_step(rn, rm, sr) do { \ + int t_ = rcache_get_tmp(); \ + emith_and_r_r_imm(AT, sr, T); \ + emith_lsr(FC, rn, 31); /*Rn = (Rn<<1)+T*/ \ + emith_lsl(t_, rn, 1); \ + emith_or_r_r(t_, AT); \ + emith_or_r_imm(sr, T); /* T = !carry */ \ + emith_eor_r_r(sr, FC); \ + emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ + EMITH_JMP3_START(DCOND_EQ); \ + emith_add_r_r_r(rn, t_, rm); \ + EMIT(PPC_CMPLW_REG(rn, t_)); \ + EMITH_JMP3_MID(DCOND_EQ); \ + emith_sub_r_r_r(rn, t_, rm); \ + EMIT(PPC_CMPLW_REG(t_, rn)); \ + EMITH_JMP3_END(); \ + EMIT(PPC_MFCR_REG(FC)); \ + EMIT(PPC_BFXW_IMM(FC, FC, 0, 1)); \ + emith_eor_r_r(sr, FC); /* T ^= carry */ \ + rcache_free_tmp(t_); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \ + emith_sext(mh, mh, 16); \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ + /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ + emith_asr(rn, mh, 15); \ + emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \ + emith_tst_r_r(rn, rn); /* (need only N and Z flags) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_MI); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* XXX: MACH should be untouched when S is set? */ \ + emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ + /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ + emith_lsr(rn, ml, 31); \ + emith_add_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \ + emith_tst_r_r(rn, rn); /* (need only N and Z flags) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ + /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_MI); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +#define emith_write_sr(sr, srcr) \ + EMIT(PPC_BFIW_IMM(sr, srcr, 22, 10)) + +#define emith_carry_to_t(sr, is_sub) \ + EMIT(PPC_BFIW_IMM(sr, FC, 32-__builtin_ffs(T), 1)) + +#define emith_t_to_carry(sr, is_sub) \ + emith_and_r_r_imm(FC, sr, 1) + +#define emith_tpop_carry(sr, is_sub) do { \ + emith_and_r_r_imm(FC, sr, 1); \ + emith_eor_r_r(sr, FC); \ +} while (0) + +#define emith_tpush_carry(sr, is_sub) \ + emith_or_r_r(sr, FC) + +#ifdef T +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +// T bit handling +static void emith_set_t_cond(int sr, int cond) +{ + int b; + + // catch never and always cases + if ((b = emith_cond_check(cond)) < 0) + return; + else if (b == BXX) { + emith_or_r_imm(sr, T); + return; + } + + // extract bit from CR and insert into T + EMIT(PPC_MFCR_REG(AT)); + EMIT(PPC_BFXW_IMM(AT, AT, (b&7), 1)); + if (!(b & 0x40)) EMIT(PPC_XOR_IMM(AT, AT, 1)); + EMIT(PPC_BFIW_IMM(sr, AT, 32-__builtin_ffs(T), 1)); +} + +#define emith_clr_t_cond(sr) ((void)sr) + +#define emith_get_t_cond() -1 + +#define emith_sync_t(sr) ((void)sr) + +#define emith_invalidate_t() + +static void emith_set_t(int sr, int val) +{ + if (val) + emith_or_r_imm(sr, T); + else + emith_bic_r_imm(sr, T); +} + +static int emith_tst_t(int sr, int tf) +{ + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; +} +#endif diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c new file mode 100644 index 00000000..5783b759 --- /dev/null +++ b/cpu/drc/emit_riscv.c @@ -0,0 +1,1701 @@ +/* + * Basic macros to emit RISC-V RV64IM instructions and some utils + * Copyright (C) 2019-2024 irixxxx + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + * + * TODO: support for compressed instructions + */ +#define HOST_REGS 32 + +// RISC-V ABI: params: x10-x17, return: x10-x11, temp: x1(ra),x5-x7,x28-x31 +// saved: x8(fp),x9,x18-x27, reserved: x0(zero), x4(tp), x3(gp), x2(sp) +// x28-x31(t3-t6) are used internally by the code emitter +#define RET_REG 10 // a0 +#define PARAM_REGS { 10, 11, 12, 13, 14, 15, 16, 17 } // a0-a7 +#define PRESERVED_REGS { 9, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 } // s1-s11 +#define TEMPORARY_REGS { 5, 6, 7 } // t0-t2 + +#define CONTEXT_REG 9 // s1 +#define STATIC_SH2_REGS { SHR_SR,27 , SHR_R(0),26 , SHR_R(1),25 } + +// registers usable for user code: r1-r25, others reserved or special +#define Z0 0 // zero register +#define GP 3 // global pointer +#define SP 2 // stack pointer +#define FP 8 // frame pointer +#define LR 1 // link register +// internally used by code emitter: +#define AT 31 // used to hold intermediate results +#define FNZ 30 // emulated processor flags: N (bit 31) ,Z (all bits) +#define FC 29 // emulated processor flags: C (bit 0), others 0 +#define FV 28 // emulated processor flags: Nt^Ns (bit 31). others x + +// All operations but ptr ops are using the lower 32 bits of the registers. +// The upper 32 bits always contain the sign extension from the lower 32 bits. + +// unified conditions; virtual, not corresponding to anything real on RISC-V +#define DCOND_EQ 0x0 +#define DCOND_NE 0x1 +#define DCOND_HS 0x2 +#define DCOND_LO 0x3 +#define DCOND_MI 0x4 +#define DCOND_PL 0x5 +#define DCOND_VS 0x6 +#define DCOND_VC 0x7 +#define DCOND_HI 0x8 +#define DCOND_LS 0x9 +#define DCOND_GE 0xa +#define DCOND_LT 0xb +#define DCOND_GT 0xc +#define DCOND_LE 0xd + +#define DCOND_CS DCOND_LO +#define DCOND_CC DCOND_HS + +// unified insn +#define R5_INSN(b25, b20, b15, b12, b7, op) \ + (((b25)<<25)|((b20)<<20)|((b15)<<15)|((b12)<<12)|((b7)<<7)|((op)<<0)) + +#define _ 0 //marker for "field unused" +#define _CB(v,l,s,d) ((((v)>>(s))&((1<<(l))-1))<<(d)) // copy l bits + +#define R5_R_INSN(op, f1, f2, rd, rs, rt) \ + R5_INSN(f2, rt, rs, f1, rd, op) +#define R5_I_INSN(op, f1, rd, rs, imm) \ + R5_INSN(_, _CB(imm,12,0,0), rs, f1, rd, op) +#define R5_S_INSN(op, f1, rt, rs, imm) \ + R5_INSN(_CB(imm,7,5,0), rt, rs, f1, _CB(imm,5,0,0), op) +#define R5_U_INSN(op, rd, imm) \ + R5_INSN(_,_,_, _CB(imm,20,12,0), rd, op) +// oy vey... R5 immediate encoding in branches is really unwieldy :-/ +#define R5_B_INSN(op, f1, rt, rs, imm) \ + R5_INSN(_CB(imm,1,12,6)|_CB(imm,6,5,0), rt, rs, f1, \ + _CB(imm,4,1,1)|_CB(imm,1,11,0), op) +#define R5_J_INSN(op, rd, imm) \ + R5_INSN(_CB(imm,1,20,6)|_CB(imm,6,5,0), _CB(imm,4,1,1)|_CB(imm,1,11,0),\ + _CB(imm,8,12,0), rd, op) + +// opcode +enum { OP_LUI=0x37, OP_AUIPC=0x17, OP_JAL=0x6f, // 20-bit immediate + OP_JALR=0x67, OP_BCOND=0x63, OP_LD=0x03, OP_ST=0x23, // 12-bit immediate + OP_IMM=0x13, OP_REG=0x33, OP_IMM32=0x1b, OP_REG32=0x3b }; +// func3 +enum { F1_ADD, F1_SL, F1_SLT, F1_SLTU, F1_XOR, F1_SR, F1_OR, F1_AND };// IMM/REG +enum { F1_MUL, F1_MULH, F1_MULHSU, F1_MULHU, F1_DIV, F1_DIVU, F1_REM, F1_REMU }; +enum { F1_BEQ, F1_BNE, F1_BLT=4, F1_BGE, F1_BLTU, F1_BGEU }; // BCOND +enum { F1_B, F1_H, F1_W, F1_D, F1_BU, F1_HU, F1_WU }; // LD/ST +// func7 +enum { F2_ALT=0x20, F2_MULDIV=0x01 }; + +#define R5_NOP R5_I_INSN(OP_IMM, F1_ADD, Z0, Z0, 0) // nop: ADDI r0, r0, #0 + +// arithmetic/logical + +// rd = rs OP rt +#define R5_ADD_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_ADD, _, rd, rs, rt) +#define R5_SUB_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_ADD, F2_ALT, rd, rs, rt) + +#define R5_NEG_REG(rd, rt) \ + R5_SUB_REG(rd, Z0, rt) + +#define R5_XOR_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_XOR, _, rd, rs, rt) +#define R5_OR_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_OR , _, rd, rs, rt) +#define R5_AND_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_AND, _, rd, rs, rt) + +// rd = rs SHIFT rt +#define R5_LSL_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_SL , _, rd, rs, rt) +#define R5_LSR_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_SR , _, rd, rs, rt) +#define R5_ASR_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_SR , F2_ALT, rd, rs, rt) + +// rd = (rs < rt) +#define R5_SLT_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_SLT, _, rd, rs, rt) +#define R5_SLTU_REG(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_SLTU,_, rd, rs, rt) + +// rd = rs OP imm12 +#define R5_ADD_IMM(rd, rs, imm12) \ + R5_I_INSN(OP_IMM, F1_ADD , rd, rs, imm12) + +#define R5_XOR_IMM(rd, rs, imm12) \ + R5_I_INSN(OP_IMM, F1_XOR , rd, rs, imm12) +#define R5_OR_IMM(rd, rs, imm12) \ + R5_I_INSN(OP_IMM, F1_OR , rd, rs, imm12) +#define R5_AND_IMM(rd, rs, imm12) \ + R5_I_INSN(OP_IMM, F1_AND , rd, rs, imm12) + +#define R5_MOV_REG(rd, rs) \ + R5_ADD_IMM(rd, rs, 0) +#define R5_MVN_REG(rd, rs) \ + R5_XOR_IMM(rd, rs, -1) + +// rd = (imm12 << (0|12)) +#define R5_MOV_IMM(rd, imm12) \ + R5_OR_IMM(rd, Z0, imm12) +#define R5_MOVT_IMM(rd, imm20) \ + R5_U_INSN(OP_LUI, rd, imm20) +#define R5_MOVA_IMM(rd, imm20) \ + R5_U_INSN(OP_AUIPC, rd, imm20) + +// rd = rs SHIFT imm5/imm6 +#define R5_LSL_IMM(rd, rs, bits) \ + R5_R_INSN(OP_IMM, F1_SL , _, rd, rs, bits) +#define R5_LSR_IMM(rd, rs, bits) \ + R5_R_INSN(OP_IMM, F1_SR , _, rd, rs, bits) +#define R5_ASR_IMM(rd, rs, bits) \ + R5_R_INSN(OP_IMM, F1_SR , F2_ALT, rd, rs, bits) + +// rd = (rs < imm12) +#define R5_SLT_IMM(rd, rs, imm12) \ + R5_I_INSN(OP_IMM, F1_SLT , rd, rs, imm12) +#define R5_SLTU_IMM(rd, rs, imm12) \ + R5_I_INSN(OP_IMM, F1_SLTU, rd, rs, imm12) + +// multiplication + +#define R5_MULHU(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_MULHU, F2_MULDIV, rd, rs, rt) +#define R5_MULHS(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_MULH, F2_MULDIV, rd, rs, rt) +#define R5_MUL(rd, rs, rt) \ + R5_R_INSN(OP_REG, F1_MUL, F2_MULDIV, rd, rs, rt) + +// branching + +#define R5_J(imm20) \ + R5_J_INSN(OP_JAL, Z0, imm20) +#define R5_JAL(rd, imm20) \ + R5_J_INSN(OP_JAL, rd, imm20) +#define R5_JR(rs, offs12) \ + R5_I_INSN(OP_JALR, _, Z0, rs, offs12) +#define R5_JALR(rd, rs, offs12) \ + R5_I_INSN(OP_JALR, _, rd, rs, offs12) + +// conditional branches; no condition code, these compare rs against rt +#define R5_BCOND(cond, rs, rt, offs13) \ + R5_B_INSN(OP_BCOND, cond, rt, rs, offs13) +#define R5_BCONDZ(cond, rs, offs13) \ + R5_B_INSN(OP_BCOND, cond, Z0, rs, offs13) +#define R5_B(offs13) \ + R5_BCOND(F1_BEQ, Z0, Z0, offs13) + +// load/store indexed base + +#define R5_LW(rd, rs, offs12) \ + R5_I_INSN(OP_LD, F1_W, rd, rs, offs12) +#define R5_LH(rd, rs, offs12) \ + R5_I_INSN(OP_LD, F1_H, rd, rs, offs12) +#define R5_LB(rd, rs, offs12) \ + R5_I_INSN(OP_LD, F1_B, rd, rs, offs12) +#define R5_LHU(rd, rs, offs12) \ + R5_I_INSN(OP_LD, F1_HU, rd, rs, offs12) +#define R5_LBU(rd, rs, offs12) \ + R5_I_INSN(OP_LD, F1_BU, rd, rs, offs12) + +#define R5_SW(rt, rs, offs12) \ + R5_S_INSN(OP_ST, F1_W, rt, rs, offs12) +#define R5_SH(rt, rs, offs12) \ + R5_S_INSN(OP_ST, F1_H, rt, rs, offs12) +#define R5_SB(rt, rs, offs12) \ + R5_S_INSN(OP_ST, F1_B, rt, rs, offs12) + +// pointer operations + +#if __riscv_xlen == 64 +#define R5_OP32 (OP_REG32 ^ OP_REG) +#define F1_P F1_D +#define PTR_SCALE 3 + +// NB: must split 64 bit result into 2 32 bit registers +#define EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) do { \ + EMIT(R5_LSL_IMM(AT, s1, 32)); \ + EMIT(R5_LSL_IMM(dhi, s2, 32)); \ + EMIT(R5_MULHU(dlo, AT, dhi)); \ + EMIT(R5_ASR_IMM(dhi, dlo, 32)); \ + EMIT(R5_ADDW_IMM(dlo, dlo, 0)); \ +} while (0) + +#define EMIT_R5_MULLS_REG(dlo, dhi, s1, s2) do { \ + EMIT(R5_MUL(dlo, s1, s2)); \ + EMIT(R5_ASR_IMM(dhi, dlo, 32)); \ + EMIT(R5_ADDW_IMM(dlo, dlo, 0)); \ +} while (0) + +#else +#define R5_OP32 0 +#define F1_P F1_W +#define PTR_SCALE 2 + +#define EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) do { \ + int at = (dhi == s1 || dhi == s2 ? AT : dhi); \ + EMIT(R5_MULHU(at, s1, s2)); \ + EMIT(R5_MUL(dlo, s1, s2)); \ + if (at != dhi) emith_move_r_r(dhi, at); \ +} while (0) + +#define EMIT_R5_MULLS_REG(dlo, dhi, s1, s2) do { \ + int at = (dhi == s1 || dhi == s2 ? AT : dhi); \ + EMIT(R5_MULHS(at, s1, s2)); \ + EMIT(R5_MUL(dlo, s1, s2)); \ + if (at != dhi) emith_move_r_r(dhi, at); \ +} while (0) +#endif + +#define PTR_SIZE (1<>1 since the lowest bit inverts the cond */ \ + unsigned _mv = BITMASK3(DCOND_VS>>1,DCOND_GE>>1,DCOND_GT>>1); \ + unsigned _mc = _mv | BITMASK2(DCOND_HS>>1,DCOND_HI>>1); \ + emith_flg_hint = (_mv & BITMASK1(cond >> 1) ? _FHV : 0); \ + emith_flg_hint |= (_mc & BITMASK1(cond >> 1) ? _FHC : 0); \ +} while (0) + +// store minimal cc information: rd, rt^rs, carry +// NB: the result *must* first go to FNZ, in case rd == rs or rd == rt. +// NB: for adcf and sbcf, carry-in must be dealt with separately (see there) +static void emith_set_arith_flags(int rd, int rs, int rt, s32 imm, int sub) +{ + if (emith_flg_hint & _FHC) { + if (sub) // C = sub:rt Z0) // Nt^Ns in FV, bit 31 + EMIT(R5_XOR_REG(FV, rs, rt)); + else if (rt == Z0 || imm == 0) + emith_flg_noV = 1; // imm #0 can't overflow + else if ((imm < 0) == !sub) + EMIT(R5_XOR_IMM(FV, rs, -1)); + else if ((imm > 0) == !sub) + EMIT(R5_XOR_REG(FV, rs, Z0)); + } + // full V = Nd^Nt^Ns^C calculation is deferred until really needed + + if (rd && rd != FNZ) + EMIT(R5_MOV_REG(rd, FNZ)); // N,Z via result value in FNZ + emith_cmp_rs = emith_cmp_rt = -1; +} + +// since R5 has less-than and compare-branch insns, handle cmp separately by +// storing the involved regs for later use in one of those R5 insns. +// This works for all conditions but VC/VS, but this is fortunately never used. +static void emith_set_compare_flags(int rs, int rt, s32 imm) +{ + emith_cmp_rt = rt; + emith_cmp_rs = rs; + emith_cmp_imm = imm; +} + +// data processing, register +#define emith_move_r_r_ptr(d, s) \ + EMIT(R5_MOV_REG(d, s)) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_ptr(d, s) + +#define emith_move_r_r(d, s) \ + emith_move_r_r_ptr(d, s) +#define emith_move_r_r_c(cond, d, s) \ + emith_move_r_r(d, s) + +#define emith_mvn_r_r(d, s) \ + EMIT(R5_MVN_REG(d, s)) + +#define emith_add_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSL_IMM(AT, s2, simm)); \ + EMIT(R5_ADD_REG(d, s1, AT)); \ + } else EMIT(R5_ADD_REG(d, s1, s2)); \ +} while (0) +#define emith_add_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_ADDW_REG(d, s1, AT)); \ + } else EMIT(R5_ADDW_REG(d, s1, s2)); \ +} while (0) + +#define emith_add_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSRW_IMM(AT, s2, simm)); \ + EMIT(R5_ADDW_REG(d, s1, AT)); \ + } else EMIT(R5_ADDW_REG(d, s1, s2)); \ +} while (0) + +#define emith_addf_r_r_r_lsl_ptr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSL_IMM(AT, s2, simm)); \ + EMIT(R5_ADD_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(R5_ADD_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) +#define emith_addf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_ADDW_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(R5_ADDW_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_addf_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSRW_IMM(AT, s2, simm)); \ + EMIT(R5_ADDW_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 0); \ + } else { \ + EMIT(R5_ADDW_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + } \ +} while (0) + +#define emith_sub_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_SUBW_REG(d, s1, AT)); \ + } else EMIT(R5_SUBW_REG(d, s1, s2)); \ +} while (0) + +#define emith_subf_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_SUBW_REG(FNZ, s1, AT)); \ + emith_set_arith_flags(d, s1, AT, 0, 1); \ + } else { \ + EMIT(R5_SUBW_REG(FNZ, s1, s2)); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + } \ +} while (0) + +#define emith_or_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_OR_REG(d, s1, AT)); \ + } else EMIT(R5_OR_REG(d, s1, s2)); \ +} while (0) + +#define emith_or_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSRW_IMM(AT, s2, simm)); \ + EMIT(R5_OR_REG(d, s1, AT)); \ + } else EMIT(R5_OR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_XOR_REG(d, s1, AT)); \ + } else EMIT(R5_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_eor_r_r_r_lsr(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSRW_IMM(AT, s2, simm)); \ + EMIT(R5_XOR_REG(d, s1, AT)); \ + } else EMIT(R5_XOR_REG(d, s1, s2)); \ +} while (0) + +#define emith_and_r_r_r_lsl(d, s1, s2, simm) do { \ + if (simm) { \ + EMIT(R5_LSLW_IMM(AT, s2, simm)); \ + EMIT(R5_AND_REG(d, s1, AT)); \ + } else EMIT(R5_AND_REG(d, s1, s2)); \ +} while (0) + +#define emith_or_r_r_lsl(d, s, lslimm) \ + emith_or_r_r_r_lsl(d, d, s, lslimm) +#define emith_or_r_r_lsr(d, s, lsrimm) \ + emith_or_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_eor_r_r_lsl(d, s, lslimm) \ + emith_eor_r_r_r_lsl(d, d, s, lslimm) +#define emith_eor_r_r_lsr(d, s, lsrimm) \ + emith_eor_r_r_r_lsr(d, d, s, lsrimm) + +#define emith_add_r_r_r_ptr(d, s1, s2) \ + emith_add_r_r_r_lsl_ptr(d, s1, s2, 0) +#define emith_add_r_r_r(d, s1, s2) \ + emith_add_r_r_r_lsl(d, s1, s2, 0) + +#define emith_addf_r_r_r_ptr(d, s1, s2) \ + emith_addf_r_r_r_lsl_ptr(d, s1, s2, 0) +#define emith_addf_r_r_r(d, s1, s2) \ + emith_addf_r_r_r_ptr(d, s1, s2) + +#define emith_sub_r_r_r(d, s1, s2) \ + emith_sub_r_r_r_lsl(d, s1, s2, 0) + +#define emith_subf_r_r_r(d, s1, s2) \ + emith_subf_r_r_r_lsl(d, s1, s2, 0) + +#define emith_or_r_r_r(d, s1, s2) \ + emith_or_r_r_r_lsl(d, s1, s2, 0) + +#define emith_eor_r_r_r(d, s1, s2) \ + emith_eor_r_r_r_lsl(d, s1, s2, 0) + +#define emith_and_r_r_r(d, s1, s2) \ + emith_and_r_r_r_lsl(d, s1, s2, 0) + +#define emith_add_r_r_ptr(d, s) \ + emith_add_r_r_r_lsl_ptr(d, d, s, 0) +#define emith_add_r_r(d, s) \ + emith_add_r_r_r(d, d, s) + +#define emith_sub_r_r(d, s) \ + emith_sub_r_r_r(d, d, s) + +#define emith_neg_r_r(d, s) \ + EMIT(R5_NEGW_REG(d, s)) + +#define emith_adc_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(AT, s2, FC); \ + emith_add_r_r_r(d, s1, AT); \ +} while (0) + +#define emith_sbc_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(AT, s2, FC); \ + emith_sub_r_r_r(d, s1, AT); \ +} while (0) + +#define emith_adc_r_r(d, s) \ + emith_adc_r_r_r(d, d, s) + +#define emith_negc_r_r(d, s) \ + emith_sbc_r_r_r(d, Z0, s) + +// NB: the incoming carry Cin can cause Cout if s2+Cin=0 (or s1+Cin=0 FWIW) +// moreover, if s2+Cin=0 caused Cout, s1+s2+Cin=s1+0 can't cause another Cout +#define emith_adcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT(R5_SLTU_REG(AT, FNZ, FC)); \ + emith_add_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 0); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_sbcf_r_r_r(d, s1, s2) do { \ + emith_add_r_r_r(FNZ, s2, FC); \ + EMIT(R5_SLTU_REG(AT, FNZ, FC)); \ + emith_sub_r_r_r(FNZ, s1, FNZ); \ + emith_set_arith_flags(d, s1, s2, 0, 1); \ + emith_or_r_r(FC, AT); \ +} while (0) + +#define emith_and_r_r(d, s) \ + emith_and_r_r_r(d, d, s) +#define emith_and_r_r_c(cond, d, s) \ + emith_and_r_r(d, s) + +#define emith_or_r_r(d, s) \ + emith_or_r_r_r(d, d, s) + +#define emith_eor_r_r(d, s) \ + emith_eor_r_r_r(d, d, s) + +#define emith_tst_r_r_ptr(d, s) do { \ + if (d != s) { \ + emith_and_r_r_r(FNZ, d, s); \ + emith_cmp_rs = emith_cmp_rt = -1; \ + } else emith_cmp_rs = s, emith_cmp_rt = Z0; \ +} while (0) +#define emith_tst_r_r(d, s) \ + emith_tst_r_r_ptr(d, s) + +#define emith_teq_r_r(d, s) do { \ + emith_eor_r_r_r(FNZ, d, s); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_cmp_r_r(d, s) \ + emith_set_compare_flags(d, s, 0) +// emith_subf_r_r_r(FNZ, d, s) + +#define emith_addf_r_r(d, s) \ + emith_addf_r_r_r(d, d, s) + +#define emith_subf_r_r(d, s) \ + emith_subf_r_r_r(d, d, s) + +#define emith_adcf_r_r(d, s) \ + emith_adcf_r_r_r(d, d, s) + +#define emith_sbcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, d, s) + +#define emith_negcf_r_r(d, s) \ + emith_sbcf_r_r_r(d, Z0, s) + + +// move immediate +#define MAX_HOST_LITERALS 32 // pool must be smaller than 4 KB +static uintptr_t literal_pool[MAX_HOST_LITERALS]; +static u32 *literal_insn[MAX_HOST_LITERALS]; +static int literal_pindex, literal_iindex; + +static inline int emith_pool_literal(uintptr_t imm) +{ + int idx = literal_pindex - 8; // max look behind in pool + // see if one of the last literals was the same + for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++) + if (imm == literal_pool[idx]) + break; + if (idx == literal_pindex) // store new literal + literal_pool[literal_pindex++] = imm; + return idx; +} + +static void emith_pool_commit(int jumpover) +{ + int i, sz = literal_pindex * sizeof(uintptr_t); + u8 *pool = (u8 *)tcache_ptr; + + // nothing to commit if pool is empty + if (sz == 0) + return; + // align pool to pointer size + if (jumpover) + pool += sizeof(u32); + i = (uintptr_t)pool & (sizeof(void *)-1); + pool += (i ? sizeof(void *)-i : 0); + // need branch over pool if not at block end + if (jumpover) + EMIT(R5_B(sz + (pool-(u8 *)tcache_ptr))); + // safety check - pool must be after insns and reachable + if ((u32)(pool - (u8 *)literal_insn[0] + 8) > 0x7ff) { + elprintf(EL_STATUS|EL_SVP|EL_ANOMALY, + "pool offset out of range"); + exit(1); + } + // copy pool and adjust addresses in insns accessing the pool + memcpy(pool, literal_pool, sz); + for (i = 0; i < literal_iindex; i++) { + *literal_insn[i] += ((u8 *)pool - (u8 *)literal_insn[i]) << 20; + } + // count pool constants as insns for statistics + for (i = 0; i < literal_pindex * sizeof(uintptr_t)/sizeof(u32); i++) + COUNT_OP; + + tcache_ptr = (void *)((u8 *)pool + sz); + literal_pindex = literal_iindex = 0; +} + +static void emith_pool_check(void) +{ + // check if pool must be committed + if (literal_iindex > MAX_HOST_LITERALS-4 || (literal_pindex && + (u8 *)tcache_ptr - (u8 *)literal_insn[0] > 0x700)) + // pool full, or displacement is approaching the limit + emith_pool_commit(1); +} + +static void emith_move_imm(int r, uintptr_t imm) +{ + u32 lui = imm + _CB(imm,1,11,12); // compensate for ADDI sign extension + if (lui >> 12) { + EMIT(R5_MOVT_IMM(r, lui)); + if (imm & 0xfff) + EMIT(R5_ADDW_IMM(r, r, imm)); + } else + EMIT(R5_ADDW_IMM(r, Z0, imm)); +} + +static void emith_move_ptr_imm(int r, uintptr_t imm) +{ +#if __riscv_xlen == 64 + uintptr_t offs = (u8 *)imm - (u8 *)tcache_ptr; + if ((s32)imm != imm && (s32)offs == offs) { + // PC relative + EMIT(R5_MOVA_IMM(r, offs + _CB(offs,1,11,12))); + if (offs & 0xfff) + EMIT(R5_ADD_IMM(r, r, offs)); + } else if ((s32)imm != imm) { + // via literal pool + int idx; + if (literal_iindex >= MAX_HOST_LITERALS) + emith_pool_commit(1); + idx = emith_pool_literal(imm); + EMIT(R5_MOVA_IMM(AT, 0)); // loads PC of MOVA insn... + 4 in LD + literal_insn[literal_iindex++] = (u32 *)tcache_ptr; + EMIT(R5_I_INSN(OP_LD, F1_P, r, AT, idx*sizeof(uintptr_t) + 4)); + } else +#endif + emith_move_imm(r, imm); +} + +#define emith_move_r_ptr_imm(r, imm) \ + emith_move_ptr_imm(r, (uintptr_t)(imm)) + +#define emith_move_r_imm(r, imm) \ + emith_move_imm(r, (u32)(imm)) +#define emith_move_r_imm_c(cond, r, imm) \ + emith_move_r_imm(r, imm) + +#define emith_move_r_imm_s8_patchable(r, imm) \ + EMIT(R5_ADD_IMM(r, Z0, (s8)(imm))) +#define emith_move_r_imm_s8_patch(ptr, imm) do { \ + u32 *ptr_ = (u32 *)ptr; \ + EMIT_PTR(ptr_, (*ptr_ & 0x000fffff) | ((u16)(s8)(imm)<<20)); \ +} while (0) + +// arithmetic/logical, immediate - R5 always takes a signed 12 bit immediate + +static void emith_op_imm(int f1, int rd, int rs, u32 imm) +{ + int op32 = (f1 == F1_ADD ? R5_OP32 : 0); + if ((imm + _CB(imm,1,11,12)) >> 12) { + emith_move_r_imm(AT, imm); + EMIT(R5_R_INSN(OP_REG^op32, f1&7,_, rd, rs, AT)); + } else if (imm || f1 == F1_AND || rd != rs) + EMIT(R5_I_INSN(OP_IMM^op32, f1&7, rd, rs, imm)); +} + +// arithmetic, immediate - can only be ADDI, since SUBI doesn't exist +#define emith_add_r_imm(r, imm) \ + emith_add_r_r_imm(r, r, imm) +#define emith_add_r_imm_c(cond, r, imm) \ + emith_add_r_imm(r, imm) + +#define emith_addf_r_imm(r, imm) \ + emith_addf_r_r_imm(r, imm) + +#define emith_sub_r_imm(r, imm) \ + emith_sub_r_r_imm(r, r, imm) +#define emith_sub_r_imm_c(cond, r, imm) \ + emith_sub_r_imm(r, imm) + +#define emith_subf_r_imm(r, imm) \ + emith_subf_r_r_imm(r, r, imm) + +#define emith_adc_r_imm(r, imm) \ + emith_adc_r_r_imm(r, r, imm); + +#define emith_adcf_r_imm(r, imm) \ + emith_adcf_r_r_imm(r, r, imm) + +#define emith_cmp_r_imm(r, imm) \ + emith_set_compare_flags(r, -1, imm) +// emith_subf_r_r_imm(FNZ, r, imm) + +#define emith_add_r_r_ptr_imm(d, s, imm) \ + emith_op_imm(F1_ADD|F2_ALT, d, s, imm) + +#define emith_add_r_r_imm(d, s, imm) \ + emith_op_imm(F1_ADD, d, s, imm) + +#define emith_addf_r_r_imm(d, s, imm) do { \ + emith_add_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, -1, imm, 0); \ +} while (0) + +#define emith_adc_r_r_imm(d, s, imm) do { \ + emith_add_r_r_r(AT, s, FC); \ + emith_add_r_r_imm(d, AT, imm); \ +} while (0) + +#define emith_adcf_r_r_imm(d, s, imm) do { \ + if (imm == 0) { \ + emith_add_r_r_r(FNZ, s, FC); \ + emith_set_arith_flags(d, s, -1, 1, 0); \ + } else { \ + emith_add_r_r_r(FNZ, s, FC); \ + EMIT(R5_SLTU_REG(AT, FNZ, FC)); \ + emith_add_r_r_imm(FNZ, FNZ, imm); \ + emith_set_arith_flags(d, s, -1, imm, 0); \ + emith_or_r_r(FC, AT); \ + } \ +} while (0) + +// NB: no SUBI in R5, since ADDI takes a signed imm +#define emith_sub_r_r_imm(d, s, imm) \ + emith_add_r_r_imm(d, s, -(imm)) +#define emith_sub_r_r_imm_c(cond, d, s, imm) \ + emith_sub_r_r_imm(d, s, imm) + +#define emith_subf_r_r_imm(d, s, imm) do { \ + emith_sub_r_r_imm(FNZ, s, imm); \ + emith_set_arith_flags(d, s, -1, imm, 1); \ +} while (0) + +// logical, immediate +#define emith_and_r_imm(r, imm) \ + emith_op_imm(F1_AND, r, r, imm) + +#define emith_or_r_imm(r, imm) \ + emith_op_imm(F1_OR, r, r, imm) +#define emith_or_r_imm_c(cond, r, imm) \ + emith_or_r_imm(r, imm) + +#define emith_eor_r_imm_ptr(r, imm) \ + emith_op_imm(F1_XOR, r, r, imm) +#define emith_eor_r_imm_ptr_c(cond, r, imm) \ + emith_eor_r_imm_ptr(r, imm) + +#define emith_eor_r_imm(r, imm) \ + emith_eor_r_imm_ptr(r, imm) +#define emith_eor_r_imm_c(cond, r, imm) \ + emith_eor_r_imm(r, imm) + +/* NB: BIC #imm not available in R5; use AND #~imm instead */ +#define emith_bic_r_imm(r, imm) \ + emith_op_imm(F1_AND, r, r, ~(imm)) +#define emith_bic_r_imm_c(cond, r, imm) \ + emith_bic_r_imm(r, imm) + +#define emith_tst_r_imm(r, imm) do { \ + emith_op_imm(F1_AND, FNZ, r, imm); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) +#define emith_tst_r_imm_c(cond, r, imm) \ + emith_tst_r_imm(r, imm) + +#define emith_and_r_r_imm(d, s, imm) \ + emith_op_imm(F1_AND, d, s, imm) + +#define emith_or_r_r_imm(d, s, imm) \ + emith_op_imm(F1_OR, d, s, imm) + +#define emith_eor_r_r_imm(d, s, imm) \ + emith_op_imm(F1_XOR, d, s, imm) + +// shift +#define emith_lsl(d, s, cnt) \ + EMIT(R5_LSLW_IMM(d, s, cnt)) + +#define emith_lsr(d, s, cnt) \ + EMIT(R5_LSRW_IMM(d, s, cnt)) + +#define emith_asr(d, s, cnt) \ + EMIT(R5_ASRW_IMM(d, s, cnt)) + +#define emith_ror(d, s, cnt) do { \ + EMIT(R5_LSLW_IMM(AT, s, 32-(cnt))); \ + EMIT(R5_LSRW_IMM(d, s, cnt)); \ + EMIT(R5_OR_REG(d, d, AT)); \ +} while (0) +#define emith_ror_c(cond, d, s, cnt) \ + emith_ror(d, s, cnt) + +#define emith_rol(d, s, cnt) do { \ + EMIT(R5_LSRW_IMM(AT, s, 32-(cnt))); \ + EMIT(R5_LSLW_IMM(d, s, cnt)); \ + EMIT(R5_OR_REG(d, d, AT)); \ +} while (0) + +#define emith_rorc(d) do { \ + emith_lsr(d, d, 1); \ + emith_lsl(AT, FC, 31); \ + emith_or_r_r(d, AT); \ +} while (0) + +#define emith_rolc(d) do { \ + emith_lsl(d, d, 1); \ + emith_or_r_r(d, FC); \ +} while (0) + +// NB: all flag setting shifts make V undefined +#define emith_lslf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsl(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_lsr(FC, _s, 31); \ + emith_lsl(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_lsrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_lsr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_lsr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_asrf(d, s, cnt) do { \ + int _s = s; \ + if ((cnt) > 1) { \ + emith_asr(d, s, cnt-1); \ + _s = d; \ + } \ + if ((cnt) > 0) { \ + emith_and_r_r_imm(FC, _s, 1); \ + emith_asr(d, _s, 1); \ + } \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_rolf(d, s, cnt) do { \ + emith_rol(d, s, cnt); \ + emith_and_r_r_imm(FC, d, 1); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_rorf(d, s, cnt) do { \ + emith_ror(d, s, cnt); \ + emith_lsr(FC, d, 31); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_rolcf(d) do { \ + emith_lsr(AT, d, 31); \ + emith_lsl(d, d, 1); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +#define emith_rorcf(d) do { \ + emith_and_r_r_imm(AT, d, 1); \ + emith_lsr(d, d, 1); \ + emith_lsl(FC, FC, 31); \ + emith_or_r_r(d, FC); \ + emith_move_r_r(FC, AT); \ + emith_move_r_r(FNZ, d); \ + emith_cmp_rs = emith_cmp_rt = -1; \ +} while (0) + +// signed/unsigned extend + +#define emith_clear_msb(d, s, count) /* bits to clear */ do { \ + u32 t; \ + if ((count) >= 21) { \ + t = (count) - 21; \ + t = 0x7ff >> t; \ + emith_and_r_r_imm(d, s, t); \ + } else { \ + emith_lsl(d, s, count); \ + emith_lsr(d, d, count); \ + } \ +} while (0) +#define emith_clear_msb_c(cond, d, s, count) \ + emith_clear_msb(d, s, count) + +#define emith_sext(d, s, count) /* bits to keep */ do { \ + emith_lsl(d, s, 32-(count)); \ + emith_asr(d, d, 32-(count)); \ +} while (0) + +// multiply Rd = Rn*Rm (+ Ra) + +#define emith_mul(d, s1, s2) \ + EMIT(R5_MULW(d, s1, s2)) \ + +#define emith_mul_u64(dlo, dhi, s1, s2) \ + EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) + +#define emith_mul_s64(dlo, dhi, s1, s2) \ + EMIT_R5_MULLS_REG(dlo, dhi, s1, s2) + +#define emith_mula_s64(dlo, dhi, s1, s2) do { \ + int t_ = rcache_get_tmp(); \ + EMIT_R5_MULLS_REG(t_, AT, s1, s2); \ + emith_add_r_r(dhi, AT); \ + emith_add_r_r(dlo, t_); \ + EMIT(R5_SLTU_REG(AT, dlo, t_)); \ + emith_add_r_r(dhi, AT); \ + rcache_free_tmp(t_); \ +} while (0) +#define emith_mula_s64_c(cond, dlo, dhi, s1, s2) \ + emith_mula_s64(dlo, dhi, s1, s2) + +// load/store. offs has 12 bits signed, hence larger offs may use a temp +static void emith_ld_offs(int sz, int rd, int rs, int o12) +{ + if (o12 >= -0x800 && o12 < 0x800) { + EMIT(R5_I_INSN(OP_LD, sz, rd, rs, o12)); + } else { + EMIT(R5_MOVT_IMM(AT, o12 + _CB(o12,1,11,12))); \ + EMIT(R5_ADD_REG(AT, rs, AT)); \ + EMIT(R5_I_INSN(OP_LD, sz, rd, AT, o12)); + } +} + +#define emith_read_r_r_offs_ptr(r, rs, offs) \ + emith_ld_offs(F1_P, r, rs, offs) +#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_read_r_r_offs_ptr(r, rs, offs) + +#define emith_read_r_r_offs(r, rs, offs) \ + emith_ld_offs(F1_W, r, rs, offs) +#define emith_read_r_r_offs_c(cond, r, rs, offs) \ + emith_read_r_r_offs(r, rs, offs) + +#define emith_read_r_r_r_ptr(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + emith_ld_offs(F1_P, r, AT, 0); \ +} while (0) +#define emith_read_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + emith_ld_offs(F1_W, r, AT, 0); \ +} while (0) +#define emith_read_r_r_r_c(cond, r, rs, rm) \ + emith_read_r_r_r(r, rs, rm) + +#define emith_read8_r_r_offs(r, rs, offs) \ + emith_ld_offs(F1_BU, r, rs, offs) +#define emith_read8_r_r_offs_c(cond, r, rs, offs) \ + emith_read8_r_r_offs(r, rs, offs) + +#define emith_read8_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + emith_ld_offs(F1_BU, r, AT, 0); \ +} while (0) +#define emith_read8_r_r_r_c(cond, r, rs, rm) \ + emith_read8_r_r_r(r, rs, rm) + +#define emith_read16_r_r_offs(r, rs, offs) \ + emith_ld_offs(F1_HU, r, rs, offs) +#define emith_read16_r_r_offs_c(cond, r, rs, offs) \ + emith_read16_r_r_offs(r, rs, offs) + +#define emith_read16_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + emith_ld_offs(F1_HU, r, AT, 0); \ +} while (0) +#define emith_read16_r_r_r_c(cond, r, rs, rm) \ + emith_read16_r_r_r(r, rs, rm) + +#define emith_read8s_r_r_offs(r, rs, offs) \ + emith_ld_offs(F1_B, r, rs, offs) +#define emith_read8s_r_r_offs_c(cond, r, rs, offs) \ + emith_read8s_r_r_offs(r, rs, offs) + +#define emith_read8s_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + emith_ld_offs(F1_B, r, AT, 0); \ +} while (0) +#define emith_read8s_r_r_r_c(cond, r, rs, rm) \ + emith_read8s_r_r_r(r, rs, rm) + +#define emith_read16s_r_r_offs(r, rs, offs) \ + emith_ld_offs(F1_H, r, rs, offs) +#define emith_read16s_r_r_offs_c(cond, r, rs, offs) \ + emith_read16s_r_r_offs(r, rs, offs) + +#define emith_read16s_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + emith_ld_offs(F1_H, r, AT, 0); \ +} while (0) +#define emith_read16s_r_r_r_c(cond, r, rs, rm) \ + emith_read16s_r_r_r(r, rs, rm) + +static void emith_st_offs(int sz, int rt, int rs, int o12) +{ + if (o12 >= -0x800 && o12 < 800) { + EMIT(R5_S_INSN(OP_ST, sz, rt, rs, o12)); + } else { + EMIT(R5_MOVT_IMM(AT, o12 + _CB(o12,1,11,12))); \ + EMIT(R5_ADD_REG(AT, rs, AT)); \ + EMIT(R5_S_INSN(OP_ST, sz, rt, AT, o12)); + } +} + +#define emith_write_r_r_offs_ptr(r, rs, offs) \ + emith_st_offs(F1_P, r, rs, offs) +#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_write_r_r_offs_ptr(r, rs, offs) + +#define emith_write_r_r_r_ptr(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + emith_st_offs(F1_P, r, AT, 0); \ +} while (0) +#define emith_write_r_r_r_ptr_c(cond, r, rs, rm) \ + emith_write_r_r_r_ptr(r, rs, rm) + +#define emith_write_r_r_offs(r, rs, offs) \ + emith_st_offs(F1_W, r, rs, offs) +#define emith_write_r_r_offs_c(cond, r, rs, offs) \ + emith_write_r_r_offs(r, rs, offs) + +#define emith_write_r_r_r(r, rs, rm) do { \ + emith_add_r_r_r_ptr(AT, rs, rm); \ + emith_st_offs(F1_W, r, AT, 0); \ +} while (0) +#define emith_write_r_r_r_c(cond, r, rs, rm) \ + emith_write_r_r_r(r, rs, rm) + +#define emith_ctx_read_ptr(r, offs) \ + emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_read(r, offs) \ + emith_read_r_r_offs(r, CONTEXT_REG, offs) +#define emith_ctx_read_c(cond, r, offs) \ + emith_ctx_read(r, offs) + +#define emith_ctx_write_ptr(r, offs) \ + emith_write_r_r_offs_ptr(r, CONTEXT_REG, offs) + +#define emith_ctx_write(r, offs) \ + emith_write_r_r_offs(r, CONTEXT_REG, offs) + +#define emith_ctx_read_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_read(r_, offs_); \ +} while (0) + +#define emith_ctx_write_multiple(r, offs, cnt, tmpr) do { \ + int r_ = r, offs_ = offs, cnt_ = cnt; \ + for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ + emith_ctx_write(r_, offs_); \ +} while (0) + +// function call handling +#define emith_save_caller_regs(mask) do { \ + int _c, _z = PTR_SIZE; u32 _m = mask & 0x3fce0; /* x5-x7,x10-x17 */ \ + _c = count_bits(_m)&3; _m |= (1<<((4-_c)&3))-1; /* ABI align */ \ + int _s = count_bits(_m) * _z, _o = _s; \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ + for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) \ + { _o -= _z; if (_c) emith_write_r_r_offs_ptr(_c, SP, _o); } \ +} while (0) + +#define emith_restore_caller_regs(mask) do { \ + int _c, _z = PTR_SIZE; u32 _m = mask & 0x3fce0; \ + _c = count_bits(_m)&3; _m |= (1<<((4-_c)&3))-1; /* ABI align */ \ + int _s = count_bits(_m) * _z, _o = 0; \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs_ptr(_c, SP, _o); _o += _z; } \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ +} while (0) + +#define host_call(addr, args) \ + addr + +#define host_arg2reg(rd, arg) \ + rd = (arg+10) + +#define emith_pass_arg_r(arg, reg) \ + emith_move_r_r_ptr(arg, reg) + +#define emith_pass_arg_imm(arg, imm) \ + emith_move_r_ptr_imm(arg, imm) + +// branching +#define emith_invert_branch(cond) /* inverted conditional branch */ \ + ((cond) ^ 0x01) + +// evaluate the emulated condition, returns a register/branch type pair +static int emith_cmpr_check(int rs, int rt, int cond, int *r, int *s) +{ + int b = -1; + + // condition check for comparing 2 registers + switch (cond) { + case DCOND_EQ: *r = rs; *s = rt; b = F1_BEQ; break; + case DCOND_NE: *r = rs; *s = rt; b = F1_BNE; break; + case DCOND_LO: *r = rs, *s = rt, b = F1_BLTU; break; // s < t, u + case DCOND_HS: *r = rs, *s = rt, b = F1_BGEU; break; // s >= t, u + case DCOND_LS: *r = rt, *s = rs, b = F1_BGEU; break; // s <= t, u + case DCOND_HI: *r = rt, *s = rs, b = F1_BLTU; break; // s > t, u + case DCOND_LT: *r = rs, *s = rt, b = F1_BLT; break; // s < t + case DCOND_GE: *r = rs, *s = rt, b = F1_BGE; break; // s >= t + case DCOND_LE: *r = rt, *s = rs, b = F1_BGE; break; // s <= t + case DCOND_GT: *r = rt, *s = rs, b = F1_BLT; break; // s > t + } + + return b; +} + +static int emith_cmpi_check(int rs, s32 imm, int cond, int *r, int *s) +{ + int b = -1; + + // condition check for comparing register with immediate + if (imm == 0) return emith_cmpr_check(rs, Z0, cond, r, s); + + emith_move_r_imm(AT, imm); + switch (cond) { + case DCOND_EQ: *r = AT, *s = rs, b = F1_BEQ; break; + case DCOND_NE: *r = AT, *s = rs, b = F1_BNE; break; + case DCOND_LO: *r = rs, *s = AT, b = F1_BLTU; break; // s < imm, u + case DCOND_HS: *r = rs, *s = AT, b = F1_BGEU; break; // s >= imm, u + case DCOND_LS: *r = AT, *s = rs, b = F1_BGEU; break; // s <= imm, u + case DCOND_HI: *r = AT, *s = rs, b = F1_BLTU; break; // s > imm, u + case DCOND_LT: *r = rs, *s = AT, b = F1_BLT; break; // s < imm + case DCOND_GE: *r = rs, *s = AT, b = F1_BGE; break; // s >= imm + case DCOND_LE: *r = AT, *s = rs, b = F1_BGE; break; // s <= imm + case DCOND_GT: *r = AT, *s = rs, b = F1_BLT; break; // s > imm + } + return b; +} + +static int emith_cond_check(int cond, int *r, int *s) +{ + int b = -1; + + *s = *r = Z0; + if (emith_cmp_rs >= 0) { + if (emith_cmp_rt != -1) + b = emith_cmpr_check(emith_cmp_rs,emith_cmp_rt, cond,r,s); + else b = emith_cmpi_check(emith_cmp_rs,emith_cmp_imm,cond,r,s); + } + + // shortcut for V known to be 0 + if (b < 0 && emith_flg_noV) switch (cond) { + case DCOND_VS: *r = Z0; b = F1_BNE; break; // never + case DCOND_VC: *r = Z0; b = F1_BEQ; break; // always + case DCOND_LT: *r = FNZ, b = F1_BLT; break; // N + case DCOND_GE: *r = FNZ, b = F1_BGE; break; // !N + case DCOND_LE: *r = Z0, *s = FNZ, b = F1_BGE; break; // N || Z + case DCOND_GT: *r = Z0, *s = FNZ, b = F1_BLT; break; // !N && !Z + } + + // the full monty if no shortcut + if (b < 0) switch (cond) { + // conditions using NZ + case DCOND_EQ: *r = FNZ; b = F1_BEQ; break; // Z + case DCOND_NE: *r = FNZ; b = F1_BNE; break; // !Z + case DCOND_MI: *r = FNZ; b = F1_BLT; break; // N + case DCOND_PL: *r = FNZ; b = F1_BGE; break; // !N + // conditions using C + case DCOND_LO: *r = FC; b = F1_BNE; break; // C + case DCOND_HS: *r = FC; b = F1_BEQ; break; // !C + // conditions using CZ + case DCOND_LS: // C || Z + case DCOND_HI: // !C && !Z + EMIT(R5_ADD_IMM(AT, FC, -1)); // !C && !Z + EMIT(R5_AND_REG(AT, FNZ, AT)); + *r = AT, b = (cond == DCOND_HI ? F1_BNE : F1_BEQ); + break; + + // conditions using V + case DCOND_VS: // V + case DCOND_VC: // !V + EMIT(R5_XOR_REG(AT, FV, FNZ)); // V = Nt^Ns^Nd^C + EMIT(R5_LSRW_IMM(AT, AT, 31)); + EMIT(R5_XOR_REG(AT, AT, FC)); + *r = AT, b = (cond == DCOND_VS ? F1_BNE : F1_BEQ); + break; + // conditions using VNZ + case DCOND_LT: // N^V + case DCOND_GE: // !(N^V) + EMIT(R5_LSRW_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(R5_XOR_REG(AT, FC, AT)); + *r = AT, b = (cond == DCOND_LT ? F1_BNE : F1_BEQ); + break; + case DCOND_LE: // (N^V) || Z + case DCOND_GT: // !(N^V) && !Z + EMIT(R5_LSRW_IMM(AT, FV, 31)); // Nd^V = Nt^Ns^C + EMIT(R5_XOR_REG(AT, FC, AT)); + EMIT(R5_ADD_IMM(AT, AT, -1)); // !(Nd^V) && !Z + EMIT(R5_AND_REG(AT, FNZ, AT)); + *r = AT, b = (cond == DCOND_GT ? F1_BNE : F1_BEQ); + break; + } + return b; +} + +// NB: R5 unconditional jumps have only +/- 1MB range, hence use reg jumps +#define emith_jump(target) do { \ + uintptr_t target_ = (uintptr_t)(target) - (uintptr_t)tcache_ptr; \ + EMIT(R5_MOVA_IMM(AT, target_ + _CB(target_,1,11,12))); \ + EMIT(R5_JR(AT, target_)); \ +} while (0) +#define emith_jump_patchable(target) \ + emith_jump(target) + +// NB: R5 conditional branches have only +/- 4KB range +#define emith_jump_cond(cond, target) do { \ + int r_, s_, mcond_ = emith_cond_check(cond, &r_, &s_); \ + u32 disp_ = (u8 *)target - (u8 *)tcache_ptr; \ + EMIT(R5_BCOND(mcond_,r_,s_,disp_ & 0x00001fff)); \ +} while (0) +#define emith_jump_cond_patchable(cond, target) \ + emith_jump_cond(cond, target) + +#define emith_jump_cond_inrange(target) \ + ((u8 *)target - (u8 *)tcache_ptr < 0x1000 && \ + (u8 *)target - (u8 *)tcache_ptr >= -0x1000+0x10) // mind cond_check + +// NB: returns position of patch for cache maintenance +#define emith_jump_patch(ptr, target, pos) do { \ + u32 *ptr_ = (u32 *)ptr; /* must skip condition check code */ \ + while ((*ptr_&0x77) != OP_BCOND && (*ptr_&0x77) != OP_AUIPC) ptr_ ++; \ + if ((*ptr_&0x77) == OP_BCOND) { \ + u32 *p_ = ptr_, disp_ = (u8 *)target - (u8 *)ptr_; \ + u32 f1_ = _CB(*ptr_,3,12,0); \ + u32 r_ = _CB(*ptr_,5,15,0), s_ = _CB(*ptr_,5,20,0); \ + EMIT_PTR(p_, R5_BCOND(f1_, r_, s_, disp_ & 0x00001fff)); \ + } else { \ + u32 *p_ = ptr_; \ + uintptr_t target_ = (uintptr_t)(target) - (uintptr_t)ptr_; \ + EMIT_PTR(p_, R5_MOVA_IMM(AT, target_ + _CB(target_,1,11,12))); \ + EMIT_PTR(p_, R5_JR(AT, target_)); \ + } \ + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_); \ +} while (0) + +#define emith_jump_patch_inrange(ptr, target) \ + ((u8 *)target - (u8 *)ptr < 0x1000 && \ + (u8 *)target - (u8 *)ptr >= -0x1000+0x10) // mind cond_check +#define emith_jump_patch_size() 8 + +#define emith_jump_at(ptr, target) do { \ + u32 *ptr_ = (u32 *)ptr; \ + uintptr_t target_ = (uintptr_t)(target) - (uintptr_t)ptr_; \ + EMIT_PTR(ptr_, R5_MOVA_IMM(AT, target_ + _CB(target_,1,11,12))); \ + EMIT_PTR(ptr_, R5_JR(AT, target_)); \ +} while (0) +#define emith_jump_at_size() 8 + +#define emith_jump_reg(r) \ + EMIT(R5_JR(r, 0)) +#define emith_jump_reg_c(cond, r) \ + emith_jump_reg(r) + +#define emith_jump_ctx(offs) do { \ + emith_ctx_read_ptr(AT, offs); \ + emith_jump_reg(AT); \ +} while (0) +#define emith_jump_ctx_c(cond, offs) \ + emith_jump_ctx(offs) + +#define emith_call(target) do { \ + uintptr_t target_ = (uintptr_t)(target) - (uintptr_t)tcache_ptr; \ + EMIT(R5_MOVA_IMM(AT, target_ + _CB(target_,1,11,12))); \ + EMIT(R5_JALR(LR, AT, target_)); \ +} while (0) +#define emith_call_cond(cond, target) \ + emith_call(target) + +#define emith_call_reg(r) \ + EMIT(R5_JALR(LR, r, 0)) + +#define emith_abicall_ctx(offs) do { \ + emith_ctx_read_ptr(AT, offs); \ + emith_call_reg(AT); \ +} while (0) + +#define emith_abijump_reg(r) \ + emith_jump_reg(r) +#define emith_abijump_reg_c(cond, r) \ + emith_abijump_reg(r) +#define emith_abicall(target) \ + emith_call(target) +#define emith_abicall_cond(cond, target) \ + emith_abicall(target) +#define emith_abicall_reg(r) \ + emith_call_reg(r) + +#define emith_call_cleanup() /**/ + +#define emith_ret() \ + EMIT(R5_JR(LR, 0)) +#define emith_ret_c(cond) \ + emith_ret() + +#define emith_ret_to_ctx(offs) \ + emith_ctx_write_ptr(LR, offs) + +#define emith_add_r_ret(r) \ + emith_add_r_r_ptr(r, LR) + +#define emith_push_ret(r) do { \ + emith_add_r_r_ptr_imm(SP, SP, -16); /* ABI requires 16 byte aligment */\ + emith_write_r_r_offs_ptr(LR, SP, 8); \ + if ((r) > 0) emith_write_r_r_offs(r, SP, 0); \ +} while (0) + +#define emith_pop_and_ret(r) do { \ + if ((r) > 0) emith_read_r_r_offs(r, SP, 0); \ + emith_read_r_r_offs_ptr(LR, SP, 8); \ + emith_add_r_r_ptr_imm(SP, SP, 16); \ + emith_ret(); \ +} while (0) + + +// emitter ABI stuff +#define emith_insn_ptr() ((u8 *)tcache_ptr) +#define emith_flush() /**/ +#define host_instructions_updated(base, end, force) __builtin___clear_cache(base, end) +#define emith_update_cache() /**/ +#define emith_rw_offs_max() 0x7ff +#define emith_uext_ptr(r) /**/ + +// SH2 drc specific +#define emith_sh2_drc_entry() do { \ + int _c, _z = PTR_SIZE; u32 _m = 0x0ffc0202; /* x1,x9,x18-x27 */ \ + _c = count_bits(_m)&3; _m |= (1<<((4-_c)&3))-1; /* ABI align */ \ + int _s = count_bits(_m) * _z, _o = _s; \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, -_s); \ + for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) \ + { _o -= _z; if (_c) emith_write_r_r_offs_ptr(_c, SP, _o); } \ +} while (0) +#define emith_sh2_drc_exit() do { \ + int _c, _z = PTR_SIZE; u32 _m = 0x0ffc0202; \ + _c = count_bits(_m)&3; _m |= (1<<((4-_c)&3))-1; /* ABI align */ \ + int _s = count_bits(_m) * _z, _o = 0; \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) \ + { if (_c) emith_read_r_r_offs_ptr(_c, SP, _o); _o += _z; } \ + if (_s) emith_add_r_r_ptr_imm(SP, SP, _s); \ + emith_ret(); \ +} while (0) + +// NB: assumes a is in arg0, tab, func and mask are temp +#define emith_sh2_rcall(a, tab, func, mask) do { \ + emith_lsr(mask, a, SH2_READ_SHIFT); \ + emith_add_r_r_r_lsl_ptr(tab, tab, mask, PTR_SCALE+1); \ + emith_read_r_r_offs_ptr(func, tab, 0); \ + emith_read_r_r_offs(mask, tab, PTR_SIZE); \ + emith_addf_r_r_r_ptr(func, func, func); \ +} while (0) + +// NB: assumes a, val are in arg0 and arg1, tab and func are temp +#define emith_sh2_wcall(a, val, tab, func) do { \ + emith_lsr(func, a, SH2_WRITE_SHIFT); \ + emith_lsl(func, func, PTR_SCALE); \ + emith_read_r_r_r_ptr(func, tab, func); \ + emith_move_r_r_ptr(12, CONTEXT_REG); /* arg2 */ \ + emith_abijump_reg(func); \ +} while (0) + +#define emith_sh2_delay_loop(cycles, reg) do { \ + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \ + int t1 = rcache_get_tmp(); \ + int t2 = rcache_get_tmp(); \ + int t3 = rcache_get_tmp(); \ + /* if (sr < 0) return */ \ + emith_cmp_r_imm(sr, 0); \ + EMITH_JMP_START(DCOND_LE); \ + /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \ + emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ + rcache_free_tmp(t3); \ + if (reg >= 0) { \ + /* if (reg <= turns) turns = reg-1 */ \ + t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ + emith_cmp_r_r(t3, t2); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \ + EMITH_SJMP_END(DCOND_HI); \ + /* if (reg <= 1) turns = 0 */ \ + emith_cmp_r_imm(t3, 1); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_move_r_imm_c(DCOND_LS, t2, 0); \ + EMITH_SJMP_END(DCOND_HI); \ + /* reg -= turns */ \ + emith_sub_r_r(t3, t2); \ + } \ + /* sr.cycles -= turns * cycles; */ \ + emith_move_r_imm(t1, cycles); \ + emith_mul(t1, t2, t1); \ + emith_sub_r_r_r_lsl(sr, sr, t1, 12); \ + EMITH_JMP_END(DCOND_LE); \ + rcache_free_tmp(t1); \ + rcache_free_tmp(t2); \ +} while (0) + +/* + * T = !carry(Rn = (Rn << 1) | T) + * if Q + * C = carry(Rn += Rm) + * else + * C = carry(Rn -= Rm) + * T ^= C + */ +#define emith_sh2_div1_step(rn, rm, sr) do { \ + int t_ = rcache_get_tmp(); \ + emith_and_r_r_imm(AT, sr, T); \ + emith_lsr(FC, rn, 31); /*Rn = (Rn<<1)+T*/ \ + emith_lsl(t_, rn, 1); \ + emith_or_r_r(t_, AT); \ + emith_or_r_imm(sr, T); /* T = !carry */ \ + emith_eor_r_r(sr, FC); \ + emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ + EMITH_JMP3_START(DCOND_EQ); \ + emith_add_r_r_r(rn, t_, rm); \ + EMIT(R5_SLTU_REG(FC, rn, t_)); \ + EMITH_JMP3_MID(DCOND_EQ); \ + emith_sub_r_r_r(rn, t_, rm); \ + EMIT(R5_SLTU_REG(FC, t_, rn)); \ + EMITH_JMP3_END(); \ + emith_eor_r_r(sr, FC); /* T ^= carry */ \ + rcache_free_tmp(t_); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \ + emith_sext(mh, mh, 16); \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ + /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ + emith_asr(rn, mh, 15); \ + emith_add_r_r_r_lsr(rn, rn, mh, 31); /* sum = (MACH>>31)+(MACH>>15) */ \ + emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_MI); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* XXX: MACH should be untouched when S is set? */ \ + emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ + /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ + emith_lsr(rn, ml, 31); \ + emith_add_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \ + emith_teq_r_r(rn, Z0); /* (need only N and Z flags) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ + /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -ovrfl */ \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +ovrfl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_MI); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +#define emith_write_sr(sr, srcr) do { \ + emith_lsr(sr, sr , 10); emith_lsl(sr, sr, 10); \ + emith_lsl(AT, srcr, 22); emith_lsr(AT, AT, 22); \ + emith_or_r_r(sr, AT); \ +} while (0) + +#define emith_carry_to_t(sr, is_sub) do { \ + emith_and_r_imm(sr, 0xfffffffe); \ + emith_or_r_r(sr, FC); \ +} while (0) + +#define emith_t_to_carry(sr, is_sub) do { \ + emith_and_r_r_imm(FC, sr, 1); \ +} while (0) + +#define emith_tpop_carry(sr, is_sub) do { \ + emith_and_r_r_imm(FC, sr, 1); \ + emith_eor_r_r(sr, FC); \ +} while (0) + +#define emith_tpush_carry(sr, is_sub) \ + emith_or_r_r(sr, FC) + +#ifdef T +// T bit handling +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +static void emith_clr_t_cond(int sr) +{ + emith_bic_r_imm(sr, T); +} + +static void emith_set_t_cond(int sr, int cond) +{ + int b, r, s; + u8 *ptr; + u32 val = 0, inv = 0; + + // try to avoid jumping around if possible + b = emith_cond_check(cond, &r, &s); + if (r == s) { + if (b == F1_BEQ || b == F1_BGE || b == F1_BGEU) + emith_or_r_imm(sr, T); + return; + } else if (r == FC) + val++, inv = (b == F1_BEQ); + + if (!val) switch (b) { + case F1_BEQ: if (s == Z0) { EMIT(R5_SLTU_IMM(AT,r ,1)); r=AT; val++; break; } + if (r == Z0) { EMIT(R5_SLTU_IMM(AT,s ,1)); r=AT; val++; break; } + EMIT(R5_XOR_REG(AT, r, s)); + EMIT(R5_SLTU_IMM(AT,AT, 1)); r=AT; val++; break; + case F1_BNE: if (s == Z0) { EMIT(R5_SLTU_REG(AT,Z0,r)); r=AT; val++; break; } + if (r == Z0) { EMIT(R5_SLTU_REG(AT,Z0,s)); r=AT; val++; break; } + EMIT(R5_XOR_REG(AT, r, s)); + EMIT(R5_SLTU_REG(AT,Z0,AT)); r=AT; val++; break; + case F1_BLTU: EMIT(R5_SLTU_REG(AT, r, s)); r=AT; val++; break; + case F1_BGEU: EMIT(R5_SLTU_REG(AT, r, s)); r=AT; val++; inv++; break; + case F1_BLT: EMIT(R5_SLT_REG(AT, r, s)); r=AT; val++; break; + case F1_BGE: EMIT(R5_SLT_REG(AT, r, s)); r=AT; val++; inv++; break; + } + if (val) { + emith_or_r_r(sr, r); + if (inv) + emith_eor_r_imm(sr, T); + return; + } + + // can't obtain result directly, use presumably slower jump !cond + or sr,T + b = emith_invert_branch(b); + ptr = tcache_ptr; + EMIT(R5_BCOND(b, r, s, 0)); + emith_or_r_imm(sr, T); + val = (u8 *)tcache_ptr - (u8 *)(ptr); + EMIT_PTR(ptr, R5_BCOND(b, r, s, val & 0x00001fff)); +} + +#define emith_get_t_cond() -1 + +#define emith_sync_t(sr) ((void)sr) + +#define emith_invalidate_t() + +static void emith_set_t(int sr, int val) +{ + if (val) + emith_or_r_imm(sr, T); + else + emith_bic_r_imm(sr, T); +} + +static int emith_tst_t(int sr, int tf) +{ + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; +} +#endif diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 754c27fe..6d2598ea 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1,6 +1,7 @@ /* * Basic macros to emit x86 instructions and some utils * Copyright (C) 2008,2009,2010 notaz + * Copyright (C) 2019-2024 irixxxx * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -13,9 +14,11 @@ */ #include -enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; +enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common + xR8, xR9, xR10, xR11, xR12, xR13, xR14, xR15 }; // x86-64 only -#define CONTEXT_REG xBP +#define CONTEXT_REG xBP +#define RET_REG xAX #define ICOND_JO 0x00 #define ICOND_JNO 0x01 @@ -50,30 +53,52 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define DCOND_VS ICOND_JO // oVerflow Set #define DCOND_VC ICOND_JNO // oVerflow Clear +#define DCOND_CS ICOND_JB // carry set +#define DCOND_CC ICOND_JAE // carry clear + #define EMIT_PTR(ptr, val, type) \ *(type *)(ptr) = val -#define EMIT(val, type) { \ +#define EMIT(val, type) do { \ EMIT_PTR(tcache_ptr, val, type); \ tcache_ptr += sizeof(type); \ -} +} while (0) -#define EMIT_OP(op) { \ +#define EMIT_OP(op) do { \ COUNT_OP; \ - EMIT(op, u8); \ -} + if ((op) > 0xff) EMIT((op) >> 8, u8); \ + EMIT((u8)(op), u8); \ +} while (0) -#define EMIT_MODRM(mod,r,rm) \ - EMIT(((mod)<<6) | ((r)<<3) | (rm), u8) +#define EMIT_MODRM(mod, r, rm) do { \ + assert((mod) < 4u); \ + assert((r) < 8u); \ + assert((rm) < 8u); \ + EMIT(((mod)<<6) | ((r)<<3) | (rm), u8); \ +} while (0) -#define EMIT_SIB(scale,index,base) \ - EMIT(((scale)<<6) | ((index)<<3) | (base), u8) +#define EMIT_SIB(scale, index, base) do { \ + assert((scale) < 4u); \ + assert((index) < 8u); \ + assert((base) < 8u); \ + EMIT(((scale)<<6) | ((index)<<3) | (base), u8); \ +} while (0) + +#define EMIT_SIB64(scale, index, base) \ + EMIT_SIB(scale, (index) & ~8u, (base) & ~8u) + +#define EMIT_REX(w,r,x,b) \ + EMIT(0x40 | ((w)<<3) | ((r)<<2) | ((x)<<1) | (b), u8) #define EMIT_OP_MODRM(op,mod,r,rm) do { \ EMIT_OP(op); \ - EMIT_MODRM(mod, r, rm); \ + EMIT_MODRM(mod, (r), rm); \ } while (0) +// 64bit friendly, rm when everything is converted +#define EMIT_OP_MODRM64(op, mod, r, rm) \ + EMIT_OP_MODRM(op, mod, (r) & ~8u, (rm) & ~8u) + #define JMP8_POS(ptr) \ ptr = tcache_ptr; \ tcache_ptr += 2 @@ -87,65 +112,102 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_PTR(ptr + 1, (tcache_ptr - (ptr+2)), u8) // _r_r -#define emith_move_r_r(dst, src) \ - EMIT_OP_MODRM(0x8b, 3, dst, src) +#define emith_move_r_r(dst, src) do {\ + EMIT_REX_IF(0, dst, src); \ + EMIT_OP_MODRM64(0x8b, 3, dst, src); \ +} while (0) -#define emith_add_r_r(d, s) \ - EMIT_OP_MODRM(0x01, 3, s, d) +#define emith_move_r_r_ptr(dst, src) do { \ + EMIT_REX_IF(1, dst, src); \ + EMIT_OP_MODRM64(0x8b, 3, dst, src); \ +} while (0) -#define emith_sub_r_r(d, s) \ - EMIT_OP_MODRM(0x29, 3, s, d) +#define emith_add_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x01, 3, s, d); \ +} while (0) -#define emith_adc_r_r(d, s) \ - EMIT_OP_MODRM(0x11, 3, s, d) +#define emith_add_r_r_ptr(d, s) do { \ + EMIT_REX_IF(1, s, d); \ + EMIT_OP_MODRM64(0x01, 3, s, d); \ +} while (0) -#define emith_sbc_r_r(d, s) \ - EMIT_OP_MODRM(0x19, 3, s, d) /* SBB */ +#define emith_sub_r_r(d, s) do {\ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x29, 3, s, d); \ +} while (0) -#define emith_or_r_r(d, s) \ - EMIT_OP_MODRM(0x09, 3, s, d) +#define emith_adc_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x11, 3, s, d); \ +} while (0) -#define emith_and_r_r(d, s) \ - EMIT_OP_MODRM(0x21, 3, s, d) +#define emith_sbc_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x19, 3, s, d); /* SBB */ \ +} while (0) -#define emith_eor_r_r(d, s) \ - EMIT_OP_MODRM(0x31, 3, s, d) /* XOR */ +#define emith_or_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x09, 3, s, d); \ +} while (0) -#define emith_tst_r_r(d, s) \ - EMIT_OP_MODRM(0x85, 3, s, d) /* TEST */ +#define emith_and_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x21, 3, s, d); \ +} while (0) -#define emith_cmp_r_r(d, s) \ - EMIT_OP_MODRM(0x39, 3, s, d) +#define emith_eor_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x31, 3, s, d); /* XOR */ \ +} while (0) + +#define emith_tst_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x85, 3, s, d); /* TEST */ \ +} while (0) + +#define emith_tst_r_r_ptr(d, s) do { \ + EMIT_REX_IF(1, s, d); \ + EMIT_OP_MODRM64(0x85, 3, s, d); /* TEST */ \ +} while (0) + +#define emith_cmp_r_r(d, s) do { \ + EMIT_REX_IF(0, s, d); \ + EMIT_OP_MODRM64(0x39, 3, s, d); \ +} while (0) // fake teq - test equivalence - get_flags(d ^ s) -#define emith_teq_r_r(d, s) { \ +#define emith_teq_r_r(d, s) do { \ emith_push(d); \ emith_eor_r_r(d, s); \ emith_pop(d); \ -} +} while (0) -#define emith_mvn_r_r(d, s) { \ +#define emith_mvn_r_r(d, s) do { \ if (d != s) \ emith_move_r_r(d, s); \ - EMIT_OP_MODRM(0xf7, 3, 2, d); /* NOT d */ \ -} + EMIT_REX_IF(0, 0, d); \ + EMIT_OP_MODRM64(0xf7, 3, 2, d); /* NOT d */ \ +} while (0) -#define emith_negc_r_r(d, s) { \ +#define emith_negc_r_r(d, s) do { \ int tmp_ = rcache_get_tmp(); \ emith_move_r_imm(tmp_, 0); \ emith_sbc_r_r(tmp_, s); \ emith_move_r_r(d, tmp_); \ rcache_free_tmp(tmp_); \ -} +} while (0) -#define emith_neg_r_r(d, s) { \ +#define emith_neg_r_r(d, s) do { \ if (d != s) \ emith_move_r_r(d, s); \ - EMIT_OP_MODRM(0xf7, 3, 3, d); /* NEG d */ \ -} + EMIT_REX_IF(0, 0, d); \ + EMIT_OP_MODRM64(0xf7, 3, 3, d); /* NEG d */ \ +} while (0) // _r_r_r -#define emith_add_r_r_r(d, s1, s2) { \ +#define emith_add_r_r_r(d, s1, s2) do { \ if (d == s1) { \ emith_add_r_r(d, s2); \ } else if (d == s2) { \ @@ -154,9 +216,75 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_move_r_r(d, s1); \ emith_add_r_r(d, s2); \ } \ -} +} while (0) -#define emith_eor_r_r_r(d, s1, s2) { \ +#define emith_add_r_r_r_ptr(d, s1, s2) do { \ + if (d == s1) { \ + emith_add_r_r_ptr(d, s2); \ + } else if (d == s2) { \ + emith_add_r_r_ptr(d, s1); \ + } else { \ + emith_move_r_r_ptr(d, s1); \ + emith_add_r_r_ptr(d, s2); \ + } \ +} while (0) + +#define emith_sub_r_r_r(d, s1, s2) do { \ + if (d == s1) { \ + emith_sub_r_r(d, s2); \ + } else if (d == s2) { \ + emith_sub_r_r(d, s1); \ + } else { \ + emith_move_r_r(d, s1); \ + emith_sub_r_r(d, s2); \ + } \ +} while (0) + +#define emith_adc_r_r_r(d, s1, s2) do { \ + if (d == s1) { \ + emith_adc_r_r(d, s2); \ + } else if (d == s2) { \ + emith_adc_r_r(d, s1); \ + } else { \ + emith_move_r_r(d, s1); \ + emith_adc_r_r(d, s2); \ + } \ +} while (0) + +#define emith_sbc_r_r_r(d, s1, s2) do { \ + if (d == s1) { \ + emith_sbc_r_r(d, s2); \ + } else if (d == s2) { \ + emith_sbc_r_r(d, s1); \ + } else { \ + emith_move_r_r(d, s1); \ + emith_sbc_r_r(d, s2); \ + } \ +} while (0) + +#define emith_and_r_r_r(d, s1, s2) do { \ + if (d == s1) { \ + emith_and_r_r(d, s2); \ + } else if (d == s2) { \ + emith_and_r_r(d, s1); \ + } else { \ + emith_move_r_r(d, s1); \ + emith_and_r_r(d, s2); \ + } \ +} while (0) + +#define emith_or_r_r_r(d, s1, s2) do { \ + if (d == s1) { \ + emith_or_r_r(d, s2); \ + } else if (d == s2) { \ + emith_or_r_r(d, s1); \ + } else { \ + emith_move_r_r(d, s1); \ + emith_or_r_r(d, s2); \ + } \ +} while (0) + +#define emith_eor_r_r_r(d, s1, s2) do { \ if (d == s1) { \ emith_eor_r_r(d, s2); \ } else if (d == s2) { \ @@ -165,35 +293,125 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_move_r_r(d, s1); \ emith_eor_r_r(d, s2); \ } \ -} +} while (0) + +// _r_r_r_shift +#define emith_add_r_r_r_lsl(d, s1, s2, lslimm) do { \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_add_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_add_r_r_r(d, s1, s2); \ +} while (0) + +#define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) do { \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_add_r_r_r_ptr(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_add_r_r_r_ptr(d, s1, s2); \ +} while (0) + +#define emith_add_r_r_r_lsr(d, s1, s2, lsrimm) do { \ + if (lsrimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsr(tmp_, s2, lsrimm); \ + emith_add_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_add_r_r_r(d, s1, s2); \ +} while (0) + +#define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) do { \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_sub_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_sub_r_r_r(d, s1, s2); \ +} while (0) + +#define emith_or_r_r_r_lsl(d, s1, s2, lslimm) do { \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_or_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_or_r_r_r(d, s1, s2); \ +} while (0) +#define emith_or_r_r_r_lsr(d, s1, s2, lsrimm) do { \ + if (lsrimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsr(tmp_, s2, lsrimm); \ + emith_or_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_or_r_r_r(d, s1, s2); \ +} while (0) + +#define emith_eor_r_r_r_lsr(d, s1, s2, lsrimm) do { \ + if (lsrimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsr(tmp_, s2, lsrimm); \ + emith_eor_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_eor_r_r_r(d, s1, s2); \ +} while (0) // _r_r_shift -#define emith_or_r_r_lsl(d, s, lslimm) { \ - int tmp_ = rcache_get_tmp(); \ - emith_lsl(tmp_, s, lslimm); \ - emith_or_r_r(d, tmp_); \ - rcache_free_tmp(tmp_); \ -} +#define emith_or_r_r_lsl(d, s, lslimm) \ + emith_or_r_r_r_lsl(d, d, s, lslimm) +#define emith_or_r_r_lsr(d, s, lsrimm) \ + emith_or_r_r_r_lsr(d, d, s, lsrimm) -// d != s -#define emith_eor_r_r_lsr(d, s, lsrimm) { \ - emith_push(s); \ - emith_lsr(s, s, lsrimm); \ - emith_eor_r_r(d, s); \ - emith_pop(s); \ -} +#define emith_eor_r_r_lsl(d, s, lslimm) do { \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s, lslimm); \ + emith_eor_r_r(d, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_eor_r_r(d, s); \ +} while (0) +#define emith_eor_r_r_lsr(d, s, lsrimm) do { \ + if (lsrimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsr(tmp_, s, lsrimm); \ + emith_eor_r_r(d, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_eor_r_r(d, s); \ +} while (0) // _r_imm -#define emith_move_r_imm(r, imm) { \ - EMIT_OP(0xb8 + (r)); \ +#define emith_move_r_imm(r, imm) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP(0xb8 + ((r)&7)); \ EMIT(imm, u32); \ -} +} while (0) -#define emith_move_r_imm_s8(r, imm) \ - emith_move_r_imm(r, (u32)(signed int)(signed char)(imm)) +#define emith_move_r_ptr_imm(r, imm) do { \ + if ((uintptr_t)(imm) <= UINT32_MAX) \ + emith_move_r_imm(r, (uintptr_t)(imm)); \ + else { \ + EMIT_REX_IF(1, 0, r); \ + EMIT_OP(0xb8 + ((r)&7)); \ + EMIT((uintptr_t)(imm), uint64_t); \ + } \ +} while (0) + +#define emith_move_r_imm_s8_patchable(r, imm) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP(0xb8 + ((r)&7)); \ + EMIT((s8)(imm), u32); \ +} while (0) +#define emith_move_r_imm_s8_patch(ptr, imm) do { \ + u8 *ptr_ = ptr; \ + while ((*ptr_ & 0xf8) != 0xb8) ptr_++; \ + EMIT_PTR(ptr_ + 1, (s8)(imm), u32); \ +} while (0) #define emith_arith_r_imm(op, r, imm) do { \ - EMIT_OP_MODRM(0x81, 3, op, r); \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0x81, 3, op, r); \ EMIT(imm, u32); \ } while (0) @@ -221,8 +439,15 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_cmp_r_imm(r, imm) \ emith_arith_r_imm(7, r, imm) +#define emith_eor_r_imm_ptr(r, imm) do { \ + EMIT_REX_IF(1, 0, r); \ + EMIT_OP_MODRM64(0x81, 3, 6, r); \ + EMIT(imm, u32); \ +} while (0) + #define emith_tst_r_imm(r, imm) do { \ - EMIT_OP_MODRM(0xf7, 3, 0, r); \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0xf7, 3, 0, r); \ EMIT(imm, u32); \ } while (0) @@ -231,34 +456,52 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_arith_r_imm(4, r, ~(imm)) // fake conditionals (using SJMP instead) -#define emith_move_r_imm_c(cond, r, imm) { \ - (void)(cond); \ - emith_move_r_imm(r, imm); \ -} - -#define emith_add_r_imm_c(cond, r, imm) { \ - (void)(cond); \ - emith_add_r_imm(r, imm); \ -} - -#define emith_sub_r_imm_c(cond, r, imm) { \ - (void)(cond); \ - emith_sub_r_imm(r, imm); \ -} - +#define emith_move_r_imm_c(cond, r, imm) \ + emith_move_r_imm(r, imm) +#define emith_add_r_imm_c(cond, r, imm) \ + emith_add_r_imm(r, imm) +#define emith_sub_r_imm_c(cond, r, imm) \ + emith_sub_r_imm(r, imm) #define emith_or_r_imm_c(cond, r, imm) \ emith_or_r_imm(r, imm) #define emith_eor_r_imm_c(cond, r, imm) \ emith_eor_r_imm(r, imm) +#define emith_eor_r_imm_ptr_c(cond, r, imm) \ + emith_eor_r_imm_ptr(r, imm) #define emith_bic_r_imm_c(cond, r, imm) \ emith_bic_r_imm(r, imm) +#define emith_tst_r_imm_c(cond, r, imm) \ + emith_tst_r_imm(r, imm) +#define emith_move_r_r_ptr_c(cond, d, s) \ + emith_move_r_r_ptr(d, s) #define emith_ror_c(cond, d, s, cnt) \ emith_ror(d, s, cnt) +#define emith_and_r_r_c(cond, d, s) \ + emith_and_r_r(d, s) +#define emith_add_r_r_imm_c(cond, d, s, imm) \ + emith_add_r_r_imm(d, s, imm) +#define emith_sub_r_r_imm_c(cond, d, s, imm) \ + emith_sub_r_r_imm(d, s, imm) + +#define emith_read8_r_r_r_c(cond, r, rs, rm) \ + emith_read8_r_r_r(r, rs, rm) +#define emith_read8s_r_r_r_c(cond, r, rs, rm) \ + emith_read8s_r_r_r(r, rs, rm) +#define emith_read16_r_r_r_c(cond, r, rs, rm) \ + emith_read16_r_r_r(r, rs, rm) +#define emith_read16s_r_r_r_c(cond, r, rs, rm) \ + emith_read16s_r_r_r(r, rs, rm) +#define emith_read_r_r_r_c(cond, r, rs, rm) \ + emith_read_r_r_r(r, rs, rm) #define emith_read_r_r_offs_c(cond, r, rs, offs) \ emith_read_r_r_offs(r, rs, offs) +#define emith_read_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_read_r_r_offs_ptr(r, rs, offs) #define emith_write_r_r_offs_c(cond, r, rs, offs) \ emith_write_r_r_offs(r, rs, offs) +#define emith_write_r_r_offs_ptr_c(cond, r, rs, offs) \ + emith_write_r_r_offs_ptr(r, rs, offs) #define emith_read8_r_r_offs_c(cond, r, rs, offs) \ emith_read8_r_r_offs(r, rs, offs) #define emith_write8_r_r_offs_c(cond, r, rs, offs) \ @@ -274,26 +517,63 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_ret_c(cond) \ emith_ret() -// _r_r_imm -#define emith_add_r_r_imm(d, s, imm) { \ - if (d != s) \ +// _r_r_imm - use lea +#define emith_add_r_r_imm(d, s, imm) do { \ + if (imm == 0) \ emith_move_r_r(d, s); \ - emith_add_r_imm(d, imm); \ -} + else { \ + EMIT_REX_IF(0, d, s); \ + emith_deref_modrm(0x8d, 2, d, s); \ + EMIT(imm, s32); \ + } \ +} while (0) -#define emith_and_r_r_imm(d, s, imm) { \ +#define emith_add_r_r_ptr_imm(d, s, imm) do { \ + if (imm == 0) \ + emith_move_r_r_ptr(d, s); \ + else { \ + EMIT_REX_IF(1, d, s); \ + emith_deref_modrm(0x8d, 2, d, s); \ + EMIT(imm, s32); \ + } \ +} while (0) + +#define emith_sub_r_r_imm(d, s, imm) do { \ if (d != s) \ emith_move_r_r(d, s); \ - emith_and_r_imm(d, imm); \ -} + if ((s32)(imm) != 0) \ + emith_sub_r_imm(d, imm); \ +} while (0) + +#define emith_and_r_r_imm(d, s, imm) do { \ + if (d != s) \ + emith_move_r_r(d, s); \ + if ((s32)(imm) != -1) \ + emith_and_r_imm(d, imm); \ +} while (0) + +#define emith_or_r_r_imm(d, s, imm) do { \ + if (d != s) \ + emith_move_r_r(d, s); \ + if ((s32)(imm) != 0) \ + emith_or_r_imm(d, imm); \ +} while (0) + +#define emith_eor_r_r_imm(d, s, imm) do { \ + if (d != s) \ + emith_move_r_r(d, s); \ + if ((s32)(imm) != 0) \ + emith_eor_r_imm(d, imm); \ +} while (0) // shift -#define emith_shift(op, d, s, cnt) { \ +#define emith_shift(op, d, s, cnt) do { \ if (d != s) \ emith_move_r_r(d, s); \ - EMIT_OP_MODRM(0xc1, 3, op, d); \ + EMIT_REX_IF(0, 0, d); \ + EMIT_OP_MODRM64(0xc1, 3, op, d); \ EMIT(cnt, u8); \ -} +} while (0) #define emith_lsl(d, s, cnt) \ emith_shift(4, d, s, cnt) @@ -310,56 +590,69 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_ror(d, s, cnt) \ emith_shift(1, d, s, cnt) -#define emith_rolc(r) \ - EMIT_OP_MODRM(0xd1, 3, 2, r) +#define emith_rolc(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0xd1, 3, 2, r); \ +} while (0) -#define emith_rorc(r) \ - EMIT_OP_MODRM(0xd1, 3, 3, r) +#define emith_rorc(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0xd1, 3, 3, r); \ +} while (0) // misc -#define emith_push(r) \ - EMIT_OP(0x50 + (r)) +#define emith_push(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP(0x50 + ((r)&7)); \ +} while (0) -#define emith_push_imm(imm) { \ +#define emith_push_imm(imm) do { \ EMIT_OP(0x68); \ EMIT(imm, u32); \ -} +} while (0) -#define emith_pop(r) \ - EMIT_OP(0x58 + (r)) +#define emith_pop(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP(0x58 + ((r)&7)); \ +} while (0) -#define emith_neg_r(r) \ - EMIT_OP_MODRM(0xf7, 3, 3, r) +#define emith_neg_r(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0xf7, 3, 3, r); \ +} while (0) -#define emith_clear_msb(d, s, count) { \ +#define emith_clear_msb(d, s, count) do { \ u32 t = (u32)-1; \ t >>= count; \ if (d != s) \ emith_move_r_r(d, s); \ - emith_and_r_imm(d, t); \ -} + if (count) emith_and_r_imm(d, t); \ +} while (0) -#define emith_clear_msb_c(cond, d, s, count) { \ +#define emith_clear_msb_c(cond, d, s, count) do { \ (void)(cond); \ emith_clear_msb(d, s, count); \ -} +} while (0) -#define emith_sext(d, s, bits) { \ +#define emith_sext(d, s, bits) do { \ emith_lsl(d, s, 32 - (bits)); \ emith_asr(d, d, 32 - (bits)); \ -} +} while (0) -#define emith_setc(r) { \ - EMIT_OP(0x0f); \ - EMIT_OP_MODRM(0x92, 3, 0, r); /* SETC r */ \ -} +#define emith_uext_ptr(r) /**/ + +#define emith_setc(r) do { \ + assert(is_abcdx(r)); \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0x0f92, 3, 0, r); /* SETC r */ \ +} while (0) // XXX: stupid mess -#define emith_mul_(op, dlo, dhi, s1, s2) { \ +#define emith_mul_(op, dlo, dhi, s1, s2) do { \ int rmr; \ - if (dlo != xAX && dhi != xAX) \ + if (dlo != xAX && dhi != xAX && rcache_is_hreg_used(xAX)) \ emith_push(xAX); \ - if (dlo != xDX && dhi != xDX) \ + if (dlo != xDX && dhi != xDX && rcache_is_hreg_used(xDX)) \ emith_push(xDX); \ if ((s1) == xAX) \ rmr = s2; \ @@ -369,19 +662,19 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_move_r_r(xAX, s1); \ rmr = s2; \ } \ - EMIT_OP_MODRM(0xf7, 3, op, rmr); /* xMUL rmr */ \ - /* XXX: using push/pop for the case of edx->eax; eax->edx */ \ - if (dhi != xDX && dhi != -1) \ - emith_push(xDX); \ - if (dlo != xAX) \ - emith_move_r_r(dlo, xAX); \ - if (dhi != xDX && dhi != -1) \ - emith_pop(dhi); \ - if (dlo != xDX && dhi != xDX) \ + EMIT_REX_IF(0, 0, rmr); \ + EMIT_OP_MODRM64(0xf7, 3, op, rmr); /* xMUL rmr */ \ + if (dlo != xAX) { \ + EMIT_REX_IF(0, 0, dlo); \ + EMIT_OP(0x90 + ((dlo)&7)); /* XCHG eax, dlo */ \ + } \ + if (dhi != xDX && dhi != -1 && !(dhi == xAX && dlo == xDX)) \ + emith_move_r_r(dhi, (dlo == xDX ? xAX : xDX)); \ + if (dlo != xDX && dhi != xDX && rcache_is_hreg_used(xDX)) \ emith_pop(xDX); \ - if (dlo != xAX && dhi != xAX) \ + if (dlo != xAX && dhi != xAX && rcache_is_hreg_used(xAX)) \ emith_pop(xAX); \ -} +} while (0) #define emith_mul_u64(dlo, dhi, s1, s2) \ emith_mul_(4, dlo, dhi, s1, s2) /* MUL */ @@ -389,23 +682,35 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_mul_s64(dlo, dhi, s1, s2) \ emith_mul_(5, dlo, dhi, s1, s2) /* IMUL */ -#define emith_mul(d, s1, s2) \ - emith_mul_(4, d, -1, s1, s2) +#define emith_mul(d, s1, s2) do { \ + if (d == s1) { \ + EMIT_REX_IF(0, d, s2); \ + EMIT_OP_MODRM64(0x0faf, 3, d, s2); \ + } else if (d == s2) { \ + EMIT_REX_IF(0, d, s1); \ + EMIT_OP_MODRM64(0x0faf, 3, d, s1); \ + } else { \ + emith_move_r_r(d, s1); \ + EMIT_REX_IF(0, d, s2); \ + EMIT_OP_MODRM64(0x0faf, 3, d, s2); \ + } \ +} while (0) // (dlo,dhi) += signed(s1) * signed(s2) -#define emith_mula_s64(dlo, dhi, s1, s2) { \ +#define emith_mula_s64(dlo, dhi, s1, s2) do { \ emith_push(dhi); \ emith_push(dlo); \ emith_mul_(5, dlo, dhi, s1, s2); \ - EMIT_OP_MODRM(0x03, 0, dlo, 4); \ - EMIT_SIB(0, 4, 4); /* add dlo, [esp] */ \ - EMIT_OP_MODRM(0x13, 1, dhi, 4); \ - EMIT_SIB(0, 4, 4); \ - EMIT(4, u8); /* adc dhi, [esp+4] */ \ - emith_add_r_imm(xSP, 4*2); \ -} + EMIT_REX_IF(0, dlo, xSP); \ + emith_deref_modrm(0x03, 0, dlo, xSP); /* add dlo, [xsp] */ \ + EMIT_REX_IF(0, dhi, xSP); \ + emith_deref_modrm(0x13, 1, dhi, xSP); /* adc dhi, [xsp+{4,8}] */ \ + EMIT(sizeof(void *), u8); \ + emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *) * 2); \ +} while (0) // "flag" instructions are the same +#define emith_adcf_r_imm emith_adc_r_imm #define emith_subf_r_imm emith_sub_r_imm #define emith_addf_r_r emith_add_r_r #define emith_subf_r_r emith_sub_r_r @@ -414,6 +719,14 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_eorf_r_r emith_eor_r_r #define emith_negcf_r_r emith_negc_r_r +#define emith_subf_r_r_imm emith_sub_r_r_imm +#define emith_addf_r_r_r emith_add_r_r_r +#define emith_subf_r_r_r emith_sub_r_r_r +#define emith_adcf_r_r_r emith_adc_r_r_r +#define emith_sbcf_r_r_r emith_sbc_r_r_r +#define emith_eorf_r_r_r emith_eor_r_r_r +#define emith_addf_r_r_r_lsr emith_add_r_r_r_lsr + #define emith_lslf emith_lsl #define emith_lsrf emith_lsr #define emith_asrf emith_asr @@ -422,60 +735,136 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_rolcf emith_rolc #define emith_rorcf emith_rorc +#define emith_deref_modrm(op, m, r, rs) do { \ + if (((rs) & 7) == 5 && m == 0) { /* xBP,xR13 not in mod 0, use mod 1 */\ + EMIT_OP_MODRM64(op, 1, r, rs); \ + EMIT(0, u8); \ + } else if (((rs) & 7) == 4) { /* xSP,xR12 must use SIB */ \ + EMIT_OP_MODRM64(op, m, r, 4); \ + EMIT_SIB64(0, 4, rs); \ + } else \ + EMIT_OP_MODRM64(op, m, r, rs); \ +} while (0) + #define emith_deref_op(op, r, rs, offs) do { \ /* mov r <-> [ebp+#offs] */ \ - if ((offs) >= 0x80) { \ - EMIT_OP_MODRM(op, 2, r, rs); \ + if ((offs) == 0) { \ + emith_deref_modrm(op, 0, r, rs); \ + } else if ((s32)(offs) < -0x80 || (s32)(offs) >= 0x80) { \ + emith_deref_modrm(op, 2, r, rs); \ EMIT(offs, u32); \ } else { \ - EMIT_OP_MODRM(op, 1, r, rs); \ - EMIT(offs, u8); \ + emith_deref_modrm(op, 1, r, rs); \ + EMIT((u8)offs, u8); \ } \ } while (0) -#define is_abcdx(r) (xAX <= (r) && (r) <= xDX) +#define is_abcdx(r) !((r) & ~0x3) -#define emith_read_r_r_offs(r, rs, offs) \ - emith_deref_op(0x8b, r, rs, offs) +#define emith_read_r_r_offs(r, rs, offs) do { \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x8b, r, rs, offs); \ +} while (0) +#define emith_read_r_r_offs_ptr(r, rs, offs) do { \ + EMIT_REX_IF(1, r, rs); \ + emith_deref_op(0x8b, r, rs, offs); \ +} while (0) -#define emith_write_r_r_offs(r, rs, offs) \ - emith_deref_op(0x89, r, rs, offs) +#define emith_write_r_r_offs(r, rs, offs) do { \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x89, r, rs, offs); \ +} while (0) +#define emith_write_r_r_offs_ptr(r, rs, offs) do { \ + EMIT_REX_IF(1, r, rs); \ + emith_deref_op(0x89, r, rs, offs); \ +} while (0) -// note: don't use prefixes on this #define emith_read8_r_r_offs(r, rs, offs) do { \ - int r_ = r; \ - if (!is_abcdx(r)) \ - r_ = rcache_get_tmp(); \ - emith_deref_op(0x8a, r_, rs, offs); \ - if ((r) != r_) { \ - emith_move_r_r(r, r_); \ - rcache_free_tmp(r_); \ - } \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x0fb6, r, rs, offs); \ +} while (0) + +#define emith_read8s_r_r_offs(r, rs, offs) do { \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x0fbe, r, rs, offs); \ } while (0) #define emith_write8_r_r_offs(r, rs, offs) do {\ - int r_ = r; \ - if (!is_abcdx(r)) { \ - r_ = rcache_get_tmp(); \ - emith_move_r_r(r_, r); \ - } \ - emith_deref_op(0x88, r_, rs, offs); \ - if ((r) != r_) \ - rcache_free_tmp(r_); \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x88, r, rs, offs); \ } while (0) -#define emith_read16_r_r_offs(r, rs, offs) { \ - EMIT(0x66, u8); /* operand override */ \ - emith_read_r_r_offs(r, rs, offs); \ -} +#define emith_read16_r_r_offs(r, rs, offs) do { \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x0fb7, r, rs, offs); \ +} while (0) -#define emith_write16_r_r_offs(r, rs, offs) { \ - EMIT(0x66, u8); \ - emith_write_r_r_offs(r, rs, offs); \ -} +#define emith_read16s_r_r_offs(r, rs, offs) do { \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x0fbf, r, rs, offs); \ +} while (0) + +#define emith_write16_r_r_offs(r, rs, offs) do { \ + EMIT(0x66, u8); /* Intel SDM Vol 2a: REX must be closest to opcode */ \ + EMIT_REX_IF(0, r, rs); \ + emith_deref_op(0x89, r, rs, offs); \ +} while (0) + +#define emith_read8_r_r_r(r, rs, rm) do { \ + EMIT_XREX_IF(0, r, rm, rs); \ + EMIT_OP_MODRM64(0x0fb6, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ +} while (0) + +#define emith_read8s_r_r_r(r, rs, rm) do { \ + EMIT_XREX_IF(0, r, rm, rs); \ + EMIT_OP_MODRM64(0x0fbe, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ +} while (0) + +#define emith_read16_r_r_r(r, rs, rm) do { \ + EMIT_XREX_IF(0, r, rm, rs); \ + EMIT_OP_MODRM64(0x0fb7, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ +} while (0) + +#define emith_read16s_r_r_r(r, rs, rm) do { \ + EMIT_XREX_IF(0, r, rm, rs); \ + EMIT_OP_MODRM64(0x0fbf, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ +} while (0) + +#define emith_read_r_r_r(r, rs, rm) do { \ + EMIT_XREX_IF(0, r, rm, rs); \ + EMIT_OP_MODRM64(0x8b, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ +} while (0) +#define emith_read_r_r_r_ptr(r, rs, rm) do { \ + EMIT_XREX_IF(1, r, rm, rs); \ + EMIT_OP_MODRM64(0x8b, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ +} while (0) + +#define emith_write_r_r_r(r, rs, rm) do { \ + EMIT_XREX_IF(0, r, rm, rs); \ + EMIT_OP_MODRM64(0x89, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov [rm + rs * 1], r */ \ +} while (0) +#define emith_write_r_r_r_ptr(r, rs, rm) do { \ + EMIT_XREX_IF(1, r, rm, rs); \ + EMIT_OP_MODRM64(0x89, 0, r, 4); \ + EMIT_SIB64(0, rs, rm); /* mov [rm + rs * 1], r */ \ +} while (0) #define emith_ctx_read(r, offs) \ emith_read_r_r_offs(r, CONTEXT_REG, offs) +#define emith_ctx_read_c(cond, r, offs) \ + emith_ctx_read(r, offs) + +#define emith_ctx_read_ptr(r, offs) do { \ + EMIT_REX_IF(1, r, CONTEXT_REG); \ + emith_deref_op(0x8b, r, CONTEXT_REG, offs); \ +} while (0) #define emith_ctx_write(r, offs) \ emith_write_r_r_offs(r, CONTEXT_REG, offs) @@ -492,75 +881,114 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_ctx_write(r_, offs_); \ } while (0) -// assumes EBX is free -#define emith_ret_to_ctx(offs) { \ - emith_pop(xBX); \ - emith_ctx_write(xBX, offs); \ -} +#define emith_ret_to_ctx(offs) do { \ + int tmp_ = rcache_get_tmp(); \ + emith_pop(tmp_); \ + emith_ctx_write(tmp_, offs); \ + rcache_free_tmp(tmp_); \ +} while (0) -#define emith_jump(ptr) { \ - u32 disp = (u32)(ptr) - ((u32)tcache_ptr + 5); \ +#define emith_jump(ptr) do { \ + u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 5); \ EMIT_OP(0xe9); \ EMIT(disp, u32); \ -} +} while (0) #define emith_jump_patchable(target) \ emith_jump(target) -#define emith_jump_cond(cond, ptr) { \ - u32 disp = (u32)(ptr) - ((u32)tcache_ptr + 6); \ - EMIT(0x0f, u8); \ - EMIT_OP(0x80 | (cond)); \ +#define emith_jump_cond(cond, ptr) do { \ + u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 6); \ + EMIT_OP(0x0f80 | (cond)); \ EMIT(disp, u32); \ -} +} while (0) +#define emith_jump_cond_inrange(ptr) !0 #define emith_jump_cond_patchable(cond, target) \ emith_jump_cond(cond, target) -#define emith_jump_patch(ptr, target) do { \ - u32 disp_ = (u32)(target) - ((u32)(ptr) + 4); \ +#define emith_jump_patch(ptr, target, pos) do { \ + u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 4); \ u32 offs_ = (*(u8 *)(ptr) == 0x0f) ? 2 : 1; \ EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \ + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr + offs_; \ } while (0) +#define emith_jump_patch_size() 4 +#define emith_jump_patch_inrange(ptr, target) !0 -#define emith_jump_at(ptr, target) { \ - u32 disp_ = (u32)(target) - ((u32)(ptr) + 5); \ +#define emith_jump_at(ptr, target) do { \ + u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 5); \ EMIT_PTR(ptr, 0xe9, u8); \ EMIT_PTR((u8 *)(ptr) + 1, disp_, u32); \ -} +} while (0) +#define emith_jump_at_size() 5 -#define emith_call(ptr) { \ - u32 disp = (u32)(ptr) - ((u32)tcache_ptr + 5); \ +#define emith_call(ptr) do { \ + u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 5); \ EMIT_OP(0xe8); \ EMIT(disp, u32); \ -} +} while (0) #define emith_call_cond(cond, ptr) \ emith_call(ptr) -#define emith_call_reg(r) \ - EMIT_OP_MODRM(0xff, 3, 2, r) +#define emith_call_reg(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0xff, 3, 2, r); \ +} while (0) -#define emith_call_ctx(offs) { \ - EMIT_OP_MODRM(0xff, 2, 2, CONTEXT_REG); \ +#define emith_abicall_ctx(offs) do { \ + EMIT_REX_IF(0, 0, CONTEXT_REG); \ + EMIT_OP_MODRM64(0xff, 2, 2, CONTEXT_REG); \ EMIT(offs, u32); \ -} +} while (0) + +#define emith_call_cleanup() \ + emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *)); // remove return addr #define emith_ret() \ EMIT_OP(0xc3) -#define emith_jump_reg(r) \ - EMIT_OP_MODRM(0xff, 3, 4, r) +#define emith_add_r_ret(r) do { \ + EMIT_REX_IF(1, r, xSP); \ + emith_deref_modrm(0x03, 0, r, xSP); /* add r, [xsp] */ \ +} while (0) -#define emith_jump_ctx(offs) { \ - EMIT_OP_MODRM(0xff, 2, 4, CONTEXT_REG); \ +#define emith_jump_reg(r) do { \ + EMIT_REX_IF(0, 0, r); \ + EMIT_OP_MODRM64(0xff, 3, 4, r); \ +} while (0) + +#define emith_jump_ctx(offs) do { \ + EMIT_REX_IF(0, 0, CONTEXT_REG); \ + EMIT_OP_MODRM64(0xff, 2, 4, CONTEXT_REG); \ EMIT(offs, u32); \ -} +} while (0) -#define emith_push_ret() +#define emith_push_ret(r) do { \ + int r_ = (r >= 0 ? r : xSI); \ + emith_push(r_); /* always push to align */ \ + emith_add_r_r_ptr_imm(xSP, xSP, -8*4); /* args shadow space */ \ +} while (0) + +#define emith_pop_and_ret(r) do { \ + int r_ = (r >= 0 ? r : xSI); \ + emith_add_r_r_ptr_imm(xSP, xSP, 8*4); /* args shadow space */ \ + emith_pop(r_); \ + emith_ret(); \ +} while (0) + +#define emith_abijump_reg(r) \ + emith_jump_reg(r) +#define emith_abijump_reg_c(cond, r) \ + emith_abijump_reg(r) +#define emith_abicall(target) \ + emith_call(target) +#define emith_abicall_cond(cond, target) \ + emith_abicall(target) +#define emith_abicall_reg(r) \ + emith_call_reg(r) -#define emith_pop_and_ret() \ - emith_ret() #define EMITH_JMP_START(cond) { \ u8 *cond_ptr; \ @@ -582,7 +1010,7 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; JMP8_EMIT_NC(else_ptr); \ } -// "simple" jump (no more then a few insns) +// "simple" jump (no more than a few insns) // ARM will use conditional instructions here #define EMITH_SJMP_START EMITH_JMP_START #define EMITH_SJMP_END EMITH_JMP_END @@ -591,55 +1019,224 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define EMITH_SJMP3_MID EMITH_JMP3_MID #define EMITH_SJMP3_END EMITH_JMP3_END -#define emith_pass_arg_r(arg, reg) { \ +#define EMITH_SJMP2_START(cond) \ + EMITH_SJMP3_START(cond) +#define EMITH_SJMP2_MID(cond) \ + EMITH_SJMP3_MID(cond) +#define EMITH_SJMP2_END(cond) \ + EMITH_SJMP3_END() + +#define EMITH_HINT_COND(cond) /**/ + +#define emith_pass_arg_r(arg, reg) do { \ int rd = 7; \ host_arg2reg(rd, arg); \ - emith_move_r_r(rd, reg); \ -} + emith_move_r_r_ptr(rd, reg); \ +} while (0) -#define emith_pass_arg_imm(arg, imm) { \ +#define emith_pass_arg_imm(arg, imm) do { \ int rd = 7; \ host_arg2reg(rd, arg); \ - emith_move_r_imm(rd, imm); \ -} + emith_move_r_ptr_imm(rd, imm); \ +} while (0) -#define host_instructions_updated(base, end) +#define host_instructions_updated(base, end, force) (void)(base),(void)(end) +#define emith_update_cache() /**/ + +#define emith_rw_offs_max() 0xffffffffU + +#define host_call(addr, args) \ + addr + +#ifdef __x86_64__ + +#define HOST_REGS 16 +#define PTR_SCALE 3 + +#define EMIT_XREX_IF(w, r, rm, rs) do { \ + int xr_ = (r) > 7 ? 1 : 0; \ + int xb_ = (rm) > 7 ? 1 : 0; \ + int xx_ = (rs) > 7 ? 1 : 0; \ + if ((w) | xr_ | xx_ | xb_) \ + EMIT_REX(w, xr_, xx_, xb_); \ +} while (0) + +#define EMIT_REX_IF(w, r, rm) \ + EMIT_XREX_IF(w, r, rm, 0) + +#ifndef _WIN32 + +// SystemV ABI conventions: +// rbx,rbp,r12-r15 are preserved, rax,rcx,rdx,rsi,rdi,r8-r11 are temporaries +// parameters in rdi,rsi,rdx,rcx,r8,r9, return values in rax,rdx +#define PARAM_REGS { xDI, xSI, xDX, xCX, xR8, xR9 } +#define PRESERVED_REGS { xR12, xR13, xR14, xR15, xBX, xBP } +#define TEMPORARY_REGS { xAX, xR10, xR11 } +#define STATIC_SH2_REGS { SHR_SR,xBX , SHR_R0,xR15 } + +#define host_arg2reg(rd, arg) \ + switch (arg) { \ + case 0: rd = xDI; break; \ + case 1: rd = xSI; break; \ + case 2: rd = xDX; break; \ + default: rd = xCX; break; \ + } + +#define emith_sh2_drc_entry() do { \ + emith_push(xBX); \ + emith_push(xBP); \ + emith_push(xR12); \ + emith_push(xR13); \ + emith_push(xR14); \ + emith_push(xR15); \ + emith_push(xSI); /* to align */ \ +} while (0) + +#define emith_sh2_drc_exit() do { \ + emith_pop(xSI); \ + emith_pop(xR15); \ + emith_pop(xR14); \ + emith_pop(xR13); \ + emith_pop(xR12); \ + emith_pop(xBP); \ + emith_pop(xBX); \ + emith_ret(); \ +} while (0) + +#else // _WIN32 + +// M$ ABI conventions: +// rbx,rbp,rsi,rdi,r12-r15 are preserved, rcx,rdx,rax,r8,r9,r10,r11 temporaries +// parameters in rcx,rdx,r8,r9, return values in rax,rdx +#define PARAM_REGS { xCX, xDX, xR8, xR9 } +#define PRESERVED_REGS { xSI, xDI, xR12, xR13, xR14, xR15, xBX, xBP } +#define TEMPORARY_REGS { xAX, xR10, xR11 } +#define STATIC_SH2_REGS { SHR_SR,xBX , SHR_R(0),xR15 , SHR_R(1),xR14 } + +#define host_arg2reg(rd, arg) \ + switch (arg) { \ + case 0: rd = xCX; break; \ + case 1: rd = xDX; break; \ + case 2: rd = xR8; break; \ + default: rd = xR9; break; \ + } + +#define emith_sh2_drc_entry() do { \ + emith_push(xBX); \ + emith_push(xBP); \ + emith_push(xR12); \ + emith_push(xR13); \ + emith_push(xR14); \ + emith_push(xR15); \ + emith_push(xSI); \ + emith_push(xDI); \ + emith_add_r_r_ptr_imm(xSP, xSP, -8*5); /* align + args shadow space */ \ +} while (0) + +#define emith_sh2_drc_exit() do { \ + emith_add_r_r_ptr_imm(xSP, xSP, 8*5); \ + emith_pop(xDI); \ + emith_pop(xSI); \ + emith_pop(xR15); \ + emith_pop(xR14); \ + emith_pop(xR13); \ + emith_pop(xR12); \ + emith_pop(xBP); \ + emith_pop(xBX); \ + emith_ret(); \ +} while (0) + +#endif // _WIN32 + +#else // !__x86_64__ + +#define HOST_REGS 8 +#define PTR_SCALE 2 + +#define EMIT_REX_IF(w, r, rm) do { \ + assert((u32)(r) < 8u); \ + assert((u32)(rm) < 8u); \ +} while (0) +#define EMIT_XREX_IF(w, r, rs, rm) do { \ + assert((u32)(r) < 8u); \ + assert((u32)(rs) < 8u); \ + assert((u32)(rm) < 8u); \ +} while (0) + +// MS/SystemV ABI: ebx,esi,edi,ebp are preserved, eax,ecx,edx are temporaries +// DRC uses REGPARM to pass upto 3 parameters in registers eax,ecx,edx. +// To avoid conflicts with param passing ebx must be declared temp here. +#define PARAM_REGS { xAX, xDX, xCX } +#define PRESERVED_REGS { xSI, xDI, xBP } +#define TEMPORARY_REGS { xBX } +#define STATIC_SH2_REGS { SHR_SR,xDI , SHR_R0,xSI } #define host_arg2reg(rd, arg) \ switch (arg) { \ case 0: rd = xAX; break; \ case 1: rd = xDX; break; \ case 2: rd = xCX; break; \ + default: rd = xBX; break; \ } -/* SH2 drc specific */ -#define emith_sh2_drc_entry() { \ +#define emith_sh2_drc_entry() do { \ emith_push(xBX); \ emith_push(xBP); \ emith_push(xSI); \ emith_push(xDI); \ -} +} while (0) -#define emith_sh2_drc_exit() { \ +#define emith_sh2_drc_exit() do { \ emith_pop(xDI); \ emith_pop(xSI); \ emith_pop(xBP); \ emith_pop(xBX); \ emith_ret(); \ -} +} while (0) -// assumes EBX is free temporary -#define emith_sh2_wcall(a, tab) { \ +#endif + +#define emith_save_caller_regs(mask) do { \ + int _c; u32 _m = mask & 0xfc7; /* AX, CX, DX, SI, DI, 8, 9, 10, 11 */ \ + if (__builtin_parity(_m) == 1) _m |= 0x8; /* BX for ABI align */ \ + for (_c = HOST_REGS-1; _m && _c >= 0; _m &= ~(1 << _c), _c--) \ + if (_m & (1 << _c)) emith_push(_c); \ +} while (0) + +#define emith_restore_caller_regs(mask) do { \ + int _c; u32 _m = mask & 0xfc7; \ + if (__builtin_parity(_m) == 1) _m |= 0x8; /* BX for ABI align */ \ + for (_c = 0; _m && _c < HOST_REGS; _m &= ~(1 << _c), _c++) \ + if (_m & (1 << _c)) emith_pop(_c); \ +} while (0) + +#define emith_sh2_rcall(a, tab, func, mask) do { \ + int scale_ = PTR_SCALE <= 2 ? PTR_SCALE : 2; \ + emith_lsr(mask, a, SH2_READ_SHIFT); \ + if (PTR_SCALE > scale_) emith_lsl(mask, mask, PTR_SCALE-scale_); \ + EMIT_XREX_IF(1, tab, tab, mask); \ + EMIT_OP_MODRM64(0x8d, 0, tab, 4); \ + EMIT_SIB64(scale_+1, mask, tab); /* lea tab, [tab + mask*(2*scale)] */ \ + EMIT_REX_IF(1, func, tab); \ + emith_deref_modrm(0x8b, 0, func, tab); /* mov func, [tab] */ \ + EMIT_REX_IF(0, mask, tab); \ + emith_deref_modrm(0x8b, 1, mask, tab); \ + EMIT(1 << PTR_SCALE, u8); /* mov mask, [tab + {4,8}] */ \ + emith_add_r_r_ptr(func, func); \ +} while (0) + +#define emith_sh2_wcall(a, val, tab, func) do { \ int arg2_; \ host_arg2reg(arg2_, 2); \ - emith_lsr(xBX, a, SH2_WRITE_SHIFT); \ - EMIT_OP_MODRM(0x8b, 0, xBX, 4); \ - EMIT_SIB(2, xBX, tab); /* mov ebx, [tab + ebx * 4] */ \ - emith_move_r_r(arg2_, CONTEXT_REG); \ - emith_jump_reg(xBX); \ -} + emith_lsr(func, a, SH2_WRITE_SHIFT); /* tmp = a >> WRT_SHIFT */ \ + EMIT_XREX_IF(1, func, tab, func); \ + EMIT_OP_MODRM64(0x8b, 0, func, 4); \ + EMIT_SIB64(PTR_SCALE, func, tab); /* mov tmp, [tab + tmp * {4,8}] */ \ + emith_move_r_r_ptr(arg2_, CONTEXT_REG); \ + emith_abijump_reg(func); \ +} while (0) -#define emith_sh2_dtbf_loop() { \ +#define emith_sh2_dtbf_loop() do { \ u8 *jmp0; /* negative cycles check */ \ u8 *jmp1; /* unsinged overflow check */ \ int cr, rn; \ @@ -663,15 +1260,64 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_move_r_imm(rn, 0); \ JMP8_EMIT(ICOND_JA, jmp1); \ rcache_free_tmp(tmp_); \ -} +} while (0) -#define emith_write_sr(sr, srcr) { \ +#define emith_sh2_delay_loop(cycles, reg) do { \ + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); \ + int t1 = rcache_get_tmp(); \ + int t2 = rcache_get_tmp(); \ + int t3 = rcache_get_tmp(); \ + if (t3 == xAX) { t3 = t1; t1 = xAX; } /* for MUL */ \ + if (t3 == xDX) { t3 = t2; t2 = xDX; } \ + /* if (sr < 0) return */ \ + emith_cmp_r_imm(sr, 0); \ + EMITH_JMP_START(DCOND_LE); \ + /* turns = sr.cycles / cycles */ \ + emith_asr(t2, sr, 12); \ + emith_move_r_imm(t3, (u32)((1ULL<<32) / (cycles))); \ + emith_mul_u64(t1, t2, t2, t3); /* multiply by 1/x */ \ + rcache_free_tmp(t3); \ + if (reg >= 0) { \ + /* if (reg <= turns) turns = reg-1 */ \ + t3 = rcache_get_reg(reg, RC_GR_RMW, NULL); \ + emith_cmp_r_r(t3, t2); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_sub_r_r_imm_c(DCOND_LS, t2, t3, 1); \ + EMITH_SJMP_END(DCOND_HI); \ + /* if (reg <= 1) turns = 0 */ \ + emith_cmp_r_imm(t3, 1); \ + EMITH_SJMP_START(DCOND_HI); \ + emith_move_r_imm_c(DCOND_LS, t2, 0); \ + EMITH_SJMP_END(DCOND_HI); \ + /* reg -= turns */ \ + emith_sub_r_r(t3, t2); \ + } \ + /* sr.cycles -= turns * cycles; */ \ + emith_move_r_imm(t1, cycles); \ + emith_mul_u64(t1, t2, t1, t2); \ + emith_sub_r_r_r_lsl(sr, sr, t1, 12); \ + EMITH_JMP_END(DCOND_LE); \ + rcache_free_tmp(t1); \ + rcache_free_tmp(t2); \ +} while (0) + +#define emith_write_sr(sr, srcr) do { \ int tmp_ = rcache_get_tmp(); \ emith_clear_msb(tmp_, srcr, 22); \ emith_bic_r_imm(sr, 0x3ff); \ emith_or_r_r(sr, tmp_); \ rcache_free_tmp(tmp_); \ -} +} while (0) + +#define emith_carry_to_t(sr, is_sub) do { \ + emith_rorc(sr); \ + emith_rol(sr, sr, 1); \ +} while (0) + +#define emith_t_to_carry(sr, is_sub) do { \ + emith_ror(sr, sr, 1); \ + emith_rol(sr, sr, 1); \ +} while (0) #define emith_tpop_carry(sr, is_sub) \ emith_lsr(sr, sr, 1) @@ -680,15 +1326,19 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_adc_r_r(sr, sr) /* + * T = carry(Rn = (Rn << 1) | T) * if Q * t = carry(Rn += Rm) * else * t = carry(Rn -= Rm) - * T ^= t + * T = !(T ^ t) */ -#define emith_sh2_div1_step(rn, rm, sr) { \ +#define emith_sh2_div1_step(rn, rm, sr) do { \ u8 *jmp0, *jmp1; \ int tmp_ = rcache_get_tmp(); \ + emith_tpop_carry(sr, 0); /* Rn = 2*Rn+T */\ + emith_adcf_r_r_r(rn, rn, rn); \ + emith_tpush_carry(sr, 0); /* T = C1 */ \ emith_eor_r_r(tmp_, tmp_); \ emith_tst_r_imm(sr, Q); /* if (Q ^ M) */ \ JMP8_POS(jmp0); /* je do_sub */ \ @@ -697,8 +1347,102 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; JMP8_EMIT(ICOND_JE, jmp0); /* do_sub: */ \ emith_sub_r_r(rn, rm); \ JMP8_EMIT_NC(jmp1); /* done: */ \ - emith_setc(tmp_); \ - EMIT_OP_MODRM(0x31, 3, tmp_, sr); /* T = Q1 ^ Q2 */ \ + emith_adc_r_r(tmp_, tmp_); \ + emith_eor_r_r(sr, tmp_);/* T = !(C1^C2) */\ + emith_eor_r_imm(sr, T); \ rcache_free_tmp(tmp_); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macl(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* MACH top 16 bits unused if saturated. sign ext for overfl detect */ \ + emith_sext(mh, mh, 16); \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ + /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ + emith_asr(rn, mh, 15); \ + emith_lsr(rm, mh, 31); \ + emith_addf_r_r(rn, rm); \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ + EMITH_SJMP_START(DCOND_MI); /* sum < 0 -> -ovl */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0xffffffff */ \ + emith_sub_r_imm_c(DCOND_PL, mh, 1); /* 0x00007fff */ \ + EMITH_SJMP_END(DCOND_MI); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +/* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* XXX: MACH should be untouched when S is set? */ \ + emith_asr(mh, ml, 31); /* sign ext MACL to MACH for ovrfl check */ \ + EMITH_SJMP_END(DCOND_EQ); \ + emith_mula_s64(ml, mh, rn, rm); \ + emith_tst_r_imm(sr, S); \ + EMITH_SJMP_START(DCOND_EQ); \ + /* overflow if top 33 bits of MACH:MACL aren't all 1 or 0 */ \ + /* to check: add MACL[31] to MACH. this is 0 if no overflow */ \ + emith_lsr(rn, ml, 31); \ + emith_addf_r_r(rn, mh); /* sum = MACH + ((MACL>>31)&1) */ \ + EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> overflow */ \ + /* XXX: LSB signalling only in SH1, or in SH2 too? */ \ + emith_move_r_imm_c(DCOND_NE, mh, 0x00000001); /* LSB of MACH */ \ + emith_move_r_imm_c(DCOND_NE, ml, 0x80000000); /* -overflow */ \ + EMITH_SJMP_START(DCOND_MI); /* sum > 0 -> +overflow */ \ + emith_sub_r_imm_c(DCOND_PL, ml, 1); /* 0x7fffffff */ \ + EMITH_SJMP_END(DCOND_MI); \ + EMITH_SJMP_END(DCOND_EQ); \ + EMITH_SJMP_END(DCOND_EQ); \ +} while (0) + +#define emith_pool_check() /**/ +#define emith_pool_commit(j) /**/ +#define emith_insn_ptr() ((u8 *)tcache_ptr) +#define emith_flush() /**/ + +#ifdef T +// T bit handling +#define emith_invert_cond(cond) \ + ((cond) ^ 1) + +static void emith_clr_t_cond(int sr) +{ + emith_bic_r_imm(sr, T); } +static void emith_set_t_cond(int sr, int cond) +{ + EMITH_SJMP_START(emith_invert_cond(cond)); + emith_or_r_imm_c(cond, sr, T); + EMITH_SJMP_END(emith_invert_cond(cond)); +} + +#define emith_get_t_cond() -1 + +#define emith_sync_t(sr) ((void)sr) + +#define emith_invalidate_t() + +static void emith_set_t(int sr, int val) +{ + if (val) + emith_or_r_imm(sr, T); + else + emith_bic_r_imm(sr, T); +} + +static int emith_tst_t(int sr, int tf) +{ + emith_tst_r_imm(sr, T); + return tf ? DCOND_NE: DCOND_EQ; +} +#endif diff --git a/cpu/fame/fame.h b/cpu/fame/fame.h index 2f9d8508..ee1140b8 100644 --- a/cpu/fame/fame.h +++ b/cpu/fame/fame.h @@ -9,6 +9,12 @@ #ifndef __FAME_H__ #define __FAME_H__ +// uintptr_t +#include +#ifndef _MSC_VER +#include +#endif + #ifdef __cplusplus extern "C" { #endif @@ -92,14 +98,25 @@ extern "C" { /* Data definition */ /*******************/ +#include + +/* +typedef unsigned char u8; +typedef signed char s8; +typedef unsigned short u16; +typedef signed short s16; +typedef unsigned int u32; +typedef signed int s32; +*/ + typedef union { - unsigned char B; - signed char SB; - unsigned short W; - signed short SW; - unsigned int D; - signed int SD; + u8 B[4]; + s8 SB[4]; + u16 W[2]; + s16 SW[2]; + u32 D; + s32 SD; } famec_union32; /* M68K CPU CONTEXT */ @@ -127,7 +144,7 @@ typedef struct signed int cycles_needed; unsigned short *PC; - unsigned long BasePC; + uintptr_t BasePC; unsigned int flag_C; unsigned int flag_V; unsigned int flag_NotZ; @@ -140,10 +157,16 @@ typedef struct unsigned char not_polling; unsigned char pad[3]; - unsigned long Fetch[M68K_FETCHBANK1]; + uintptr_t Fetch[M68K_FETCHBANK1]; } M68K_CONTEXT; -extern M68K_CONTEXT *g_m68kcontext; +typedef enum +{ + fm68k_reason_emulate = 0, + fm68k_reason_init, + fm68k_reason_idle_install, + fm68k_reason_idle_remove, +} fm68k_call_reason; /************************/ /* Function definition */ @@ -151,12 +174,15 @@ extern M68K_CONTEXT *g_m68kcontext; /* General purpose functions */ void fm68k_init(void); -int fm68k_reset(void); -int fm68k_emulate(int n, int idle_mode); -int fm68k_would_interrupt(void); // to be called from fm68k_emulate() +int fm68k_reset(M68K_CONTEXT *ctx); +int fm68k_emulate(M68K_CONTEXT *ctx, int n, fm68k_call_reason reason); +int fm68k_would_interrupt(M68K_CONTEXT *ctx); // to be called from fm68k_emulate() -unsigned fm68k_get_pc(M68K_CONTEXT *context); +u32 fm68k_get_pc(const M68K_CONTEXT *ctx); +// PICODRIVE_HACK +int fm68k_idle_install(void); +int fm68k_idle_remove(void); #ifdef __cplusplus } diff --git a/cpu/fame/fame.html b/cpu/fame/fame.html new file mode 100644 index 00000000..9d9ebfee --- /dev/null +++ b/cpu/fame/fame.html @@ -0,0 +1,1128 @@ + + + +FAME Fast and Accurate Morolora 68000 Emulation Library + + + + +
+
F.A.M.E.
+ Fast and Accurate Morolora 68000 Emulation Library

+ Copyright (c) 2002-2005 Oscar Orallo Peláez / Daniel Lancha García. All rights + reserved.

+ March 14th, 2006
+
+
+

Table of Contents

+
+
+
+

0. Introduction
+ 1. Terms of Use
+ 2. Version History
+
3. What is emulated
+ 4. Using the emulation library
+     4.1. Data structure
+         4.1.1. + CPU context
+         4.1.2. + Memory map definition example
+     4.2. Memory handling
+     4.3. Running the CPU
+ 5. Interrupts and exceptions
+
    5.1. Interrupt + acknowledge
+     5.2. Customizing + processing (HLE)
+     5.3. IRQ lowering
+ 6. Function Reference
+     6.1. General Purpose + Functions
+     6.2. Hardware interrupt + handling functions
+     6.3. CPU context handling + functions
+     6.4. Timing functions
+ 7. Multi-CPU systems
+ 8. Helpful tips
+ 9. Troubleshooting
+ 10. Known bugs
+ 11. Special thanks

+
+
+
+ + + + +
+ 0. Introduction
+

This is the documentation for FAME library, please read it.

+

FAME is an extremely fast and accurate Motorola 68000 Emulation Library. + It is currently available for Intel x86-based systems (80386 or better processor) + and SH-4 based systems.

+

The x86 version was designed to work under any win32 development environment + such as Microsoft Visual Basic, Microsoft Visual C++, Borland Delphi or Borland + C++ Builder.

+

The SH-4 version was specially designed for the Dreamcast videogame console + but it can be used in any SH-4 based system.

+

This manual tries to be a guide to get the emulation library working in your + development environment. I hope you find it useful. If you use FAME in your + project I would like to hear your opinion about it.

+

The package contains one example (C++ program) to show how the library should + be called and used. It was compiled successfully in Microsoft Visual C++ 6.0 + SP5, Borland C++ Builder 5/6 and Borland C++ Compiler 5.5.

+

If you have any questions about how it works in your favorite compiler send + me an email. I'd like to help you with FAME.
+ If you find any bug in FAME, it would be nice that you inform me about that via + email. Any feedback, comments + and suggestions will also be appreciated.

+

How to contact Oscar Orallo:

+
+

E-mail:      oscar@m68k.com
+ Web site:  http://www.m68k.com/fame

+
+

FAME Distribution: http://www.m68k.com/fame/fame.zip + (latest)
+FAME Development Package: http://www.m68k.com/fame/famedev.zip (latest)

+

Here we go folks, have fun :)

+

 

+ + + + +
 1. + Terms of use
+

FAME is a development package that contains the following files:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FileDescription
/x86/win32/fame.dllMicrosoft win32 dinamic link library
/x86/linux/libfame.ax86 static ELF library
/x86/cygwin/libfame.ax86 static win32 library
/sh4/libfame.aSH-4 static ELF library
/delphi/fame.pasDelphi unit
/doc/fame.htmlDocumentation file
/example/main.cC source code example
/example/makefile.cygwinCygwin example makefile
/example/makefile.dcDreamcast example makefile
/example/makefile.linuxLinux example makefile
/example/romdisk/bubble.binMotorola 68000 binary code file
/lib/bc/fame.libBorland C++ 32-bit import library
/lib/vc/fame.expMicrosoft Visual C++ 32-bit export file
/include/fame.hC/C++ header file
+


+ FAME may be distributed freely in unmodified form, as long as this document +file is included.

+

Nothing may be charged for this library. If you want to use it in a shareware + or commercial application, contact me.

+

The author will not be held liable for damages. FAME comes with absolutely + NO WARRANTY. Anyway i will try to help you with any problem you have using FAME.

+

If you do not agree with all of these terms, please remove FAME from your computer.

+

You are encouraged to contact the author if you wish to use FAME in a commercial + product (to negotiate licensing).

+

Any program which uses FAME must include in its documentation or in the program + itself the following credit text:

+

FAME Motorola 68000 Emulation Library by Oscar Orallo (oscar@m68k.com)

+

 

+ + + + +
 2. + Version History
+
+ + + + + + + + + +
Intel® + 80386 CISC engine
Super H® SH-4 RISC + engine

Version 2.0a (14th, march, 2006)

+

- Stupid bug fixed in IRQ management (thanks Martin Kresse).
+ - Accurate DIV timing implemented (thanks Jorge Cwik).
+ - Overflow detection fixed in signed DIV instruction.
+ - Added makefiles for linux and cygwin environments.

+

Version 2.0 (11th, january, 2006)

+

- set_irq_type API function removed. IRQs will be automatically lowered once it was attended.
+ - Many flag calculations fixed.
+ - Some minor tweaks.

+

Version 1.23 (5th April, + 2005)

+

- Timing fixed in MOVEM instructions.

+

Version 1.22 (7th March, 2005)

+

- Fixed a stupid bug in fetch function.

+

Version 1.21 (19th February, 2005)

+

- set_irq_type function changed for flexible use.
+ - Pointer to data structure parameter removed from memory handlers to + increase throughtput.

+

Version 1.2 (17th December, 2004)

+

- Fixed the PC base calculation for fetch memory regions beyond the first + allocated area.
+ - Fixed a tiny error in the status register masking. Several instructions + could generate an invalid value.
+ - Fixed JSR instruction when jumping to a fetch bank different to the + current one.
+ - Tiny error fixed in interrupt acknowledge function parameter.
+ - Fixed the set_context function when setting status register.
+ - Fixed the PC restoring in HLE feature.
+ - Fixed DIVS instruction operation.

+

Version 1.1 (7th October, 2004)

+

- New static ELF library available.
+ - Interrupt acknowledge calling bug fixed.
+ - Custom exception processing (HLE) feature added.
+ - Some little changes in CPU context (register ordering).
+ - Flag N calculation in CHK instruction fixed.
+ - Some little code tweaks.
+ - New sections added to this document.
+ - Some defines added to header file.
+ - Some return values have been changed.
+ - Set/get context functions simplified. Some changes have been applied. +

+

Version 1.0g (2nd August, 2004)

+

- Speed emulation increased once more. The fetch/decode/execute loop + has been inlined.
+ - Some API functions added: add_cycles and release_cycles.
+ - Memory handling section added to this document (thanks Richard Hollstein + for requesting it).
+ - Faster memory access (both program code and data). Memory regions must + be 4 KB aligned now.
+ - Overhead reduced in emulate function calls (entry/exit code + optimized).
+ - Interrupt acknowledge function added.
+ - Some code tweaks here and there.
+ - Static library for Borland C++ compilers added to the package.
+ - DLL file size reduced: internal compression (UPX).
+ - Fixed a bug in STOP instruction: the processor started up after an interrupt + request even if its interrupt level was not higher than current PPL.
+ - Ver little optimization in branch instructions.
+

+

Version 1.0f (23th February, 2003)

+

- API functions added: get_register and set_register + to retrieve and set register values.
+ - Small optimizations for improved speed.
+ - CPU context modified: execinfo added for more complete CPU state handling + support.
+ - LIB file added to package to support implicit linking :).
+ - Changes in documentation.
+ - C header file (fame.h) and Delphi unit (fame.pas) modified.
+

+

Version 1.0e (18th February, 2003)

+

- Emulation core speed increased lightly (faster entry/exit code). Now + the library is pretty fast.
+ - Some changes in function and variable naming (odometer changed to cycles_counter).
+

+

Version 1.0d (20th December, 2002)

+

- Fetch function speed incremented a bit.
+ - Fixed memory boundary for byte data accesses.
+

+

Version 1.0c (27th August, 2002)

+

- Fixed a stupid bug in group 0 exceptions management.
+

+

Version 1.0b (16th August, 2002)

+

- Many errors corrected in documentation about memory mapping.
+ - The function fetch has now capability to access to the data + address space.

+


+ Version 1.0a (24th July, 2002)

+

- First public release.

Version 2.0a (14th, march, 2006)

+

- Timing fixed for DIV and signed MUL instructions.
+ - Improved overflow detection in signed DIV instruction.
+ - Added makefile for Dreamcast system (requires KOS).

+

Version 2.0 (11th, january, 2006)

+

- Tons of bugs fixed (thanks Chui).
+ - Accurate DIV timing implemented (thanks Jorge Cwik).
+ - set_irq_type API function removed. IRQs will be automatically lowered once it was attended.
+ - Great speed improvements.

+

Version 0.04 (5th April, 2005)

+

- Lightweighted entry/exit code.
+ - Fixed sign/zero flag calculation when moving long data from memory to + memory.
+ - Privilege violation exception fixed.
+ - Faster interrupt/exception management.
+ - Timing fixed in MOVEM instructions.

+

Version 0.03 (7th March, 2005)

+

- Sign flag calculation in inmediate logical instruction fixed.
+ - Carry flag calculation in NEG instruction fixed.
+ - Overflow flag in operations with X flag fixed.
+ - Fixed CPU state stop bit.
+ - Speed up by about 20%.
+ - Tiny tweaks here and there and everywhere.

+

Version 0.02 (19th February, 2005)

+

- First beta release.
+ - set_irq_type function changed for flexible use.
+ - Greatly improved internal memory management.
+ - Pointer to data structure parameter removed from memory handlers to + increase throughtput.
+ - DIV/DIVS instructions fixed.
+ - ABCD/SBCD adjusted result fixed.
+ - MOVEM (control addressing mode) instruction fixed.
+ - BTST with memory addressing mode fixed.
+ - Fixed Z flag calculation in NEGX instruction.
+ - Fixed TAS instruction.
+ - Fixed RESET instruction (external handler calling).
+ - Fixed ILLEGAL instruction (exception generation).
+ - Quick ADD to address register fixed.
+ - EXG instruction fixed.
+ - V flag calculation fixed in ASL instruction.
+ - Some tiny tweaks & improvements.

+

Version 0.01b (17th December, 2004)

+

- Memory map cache generation fixed.
+ - Fixed the PC restoring in HLE feature.
+ - Many opcodes fixed.
+ - Lots of bugs fixed.

+

Version 0.01a (7th October, 2004)

+

- First public release. Alpha development state!

+

 

+ + + + +
 3. + What is emulated
+

This library emulates the Motorola 68000 microprocessor. The main emulation + features are the following:

+
    +
  • +
     Written in 100% 32-bit assembly language.
    +
  • +
  • +
     Support for all opcodes.
    +
  • +
  • +
     Calculates 100% of flags correctly, + even undocumented ones.
    +
  • +
  • +
     Excellent accurate timing emulation for all opcodes. All instructions + have perfect timing emulation according to Motorola references. Take a look at Motorola manuals for more information about this + fact.
    +
  • +
  • +
     Complete hardware interrupt support.
    +
  • +
  • +
     Accurate exception support allowing an appropriate emulation of home + computer systems.
    +
  • +
  • +
     Priorities between interrupts and exceptions are fully emulated.
    +
  • +
+

 

+ + + +
 4. + Using the emulation library
+

4.1. Data structure

+

The data structures used in the emulation core is defined in the C file header + fame.h. In this file you will get all the data structures needed to use the + library.
+
+ If you cannot use this file because you are not using a C/C++ compliant compiler + you have to define this structures by yourself in your code.

+

Here I describe these data structures.

+
+

struct M68K_PROGRAM
+ {
+     unsigned low_addr;
+     unsigned high_addr;
+     unsigned offset;
+ }

+
+

This structure defines the memory regions for 68000 program code. The fields + low_addr and high_addr are 32-bit values used for determine + the low and high address of the memory block in the 68000 memory map.

+

The last field is a 32-bit pointer to the data of the memory region. The data + pointed by it must be allocated in native (Motorola) format. If not, the data + will be fetched incorrectly. Make sure of this fact.

+
+

struct M68K_DATA
+ {
+     unsigned low_addr;
+     unsigned high_addr;
+     void *mem_handler;
+     void *data;
+ }

+
+

This one is used for 68000 data code. This structure has an appearance very + similar to the last one but has a diference in the way you can give the control + of the memory to FAME. The pointer called mem_handler is a function + pointer. This pointer is used for memory management, so when you want to take + control in the reading/writing of a memory region, you have to set this pointer + to the appropriate value. If you do not want to use this funcionality you have + to set this pointer to NULL and set data pointing to the data itself. + The different ways to perform memory handling will be described with more detail + in memory handling section.

+

4.1.1. CPU + context

+
+

struct M68K_CONTEXT
+ {
+     struct M68K_PROGRAM *fetch;
+     struct M68K_DATA *read_byte;
+     struct M68K_DATA *read_word;
+     struct M68K_DATA *write_byte;
+     struct M68K_DATA *write_word;
+     struct M68K_PROGRAM *sv_fetch;
+     struct M68K_DATA *sv_read_byte;
+     struct M68K_DATA *sv_read_word;
+     struct M68K_DATA *sv_write_byte;
+     struct M68K_DATA *sv_write_word;
+     struct M68K_PROGRAM *user_fetch;
+     struct M68K_DATA *user_read_byte;
+     struct M68K_DATA *user_read_word;
+     struct M68K_DATA *user_write_byte;
+     struct M68K_DATA *user_write_word;
+     void (*reset_handler)(void);

+     void (*iack_handler)(unsigned level);
+     unsigned *icust_handler;
+     unsigned dreg[8];
+     unsigned areg[8];
+     unsigned asp;
+     unsigned pc;
+     unsigned cycles_counter;
+     unsigned char interrupts[8];
+     unsigned short sr;
+     unsigned short execinfo;
+ }

+
+

This structure defines a CPU context. You have to declare a variable of this + type. It contains all information related with the context of the CPU.

+

You have to set pointer values of sv* + which defines the supervisor memory map. In order to get the CPU into user mode, + set the user* + pointers.

+

The pointer reset_handler is called when the RESET instruction is + executed. In this way, you can reset all external devices in the calling to + this function. If you do not want to use this feature remember to set this pointer + to NULL.

+

The pointer iack_handler is called whenever a hardware interrupt is + handled by the CPU. This feature will be covered later in Interrupts + and exceptions section.

+

The pointer icust_handler is intented to point to an array of function + pointers to handle customized interrupt/exception processing (known as High + Level Emulation or HLE for short). See Interrupts and + exceptions section to set up this feature.

+

The rest of the structure is managed by FAME so you can read it in execution + time to retrieve information about the CPU.

+

Here I describe some interesting fields for the 68000 programmer:

+
+

- dreg[8] + holds the eight data registers in order (d0 - d7).
+ - areg[8] + holds the eight address registers in order (a0 - a7).
+ - pc + is the current PC address.
+ - asp + stands for Alternative Stack Pointer. It is used to store the not + currently used stack pointer. In supervisor mode, asp is the user stack pointer, + in user mode it is the supervisor stack pointer.
+ - cycles_counter + holds the number of cycles executed so far.
+ - interrupts + is an array that contains information about interrupts.
+ - sr + is the status register.

+
+

4.1.2 Memory map definition + example

+

As an example of an address space definition, consider the following simple + memory map:

+
    +
  •  ROM: 000000-01FFFF
  • +
  •  RAM-1: 300000-407FFF
  • +
  •  RAM-2: 500000-50FFFF
  • +
  •  RAM-3: 600000-601FFF
  • +
  •  RAM-4: 800000-80AFFF
  • +
+

This is the structure for the program address space. I will suppose that ROM, + RAM-1 and RAM-2 contains program code.

+
+

struct M68K_PROGRAM prg_fetch[] + = {
+     {0x000000, 0x01FFFF, (unsigned)rom},
+     {0x300000, 0x407FFF, (unsigned)ram1 - 0x300000},
+     {0x500000, 0x500FFF, (unsigned)ram2 - 0x500000},
+     {-1, -1, NULL}
+ }

+
+

Note that the last entry must be {-1, + -1, NULL}.

+

Now, I will set up the data address space. In this case, I will suppose that + all memory areas will be accesed and that RAM-3 is accessed by the routine mem_access. + To do this, you will have to set up the following:

+

- One structure for read byte operations:

+
+

struct M68K_DATA data_rb[] + = {
+     {0x000000, 0x01FFFF, NULL, rom},
+     {0x300000, 0x407FFF, NULL, ram1 - 0x300000},
+     {0x500000, 0x507FFF, NULL, ram2 - 0x500000},
+     {0x600000, 0x601FFF, mem_access, NULL},
+     {0x800000, 0x80AFFF, NULL, ram4 - 0x800000},
+     {-1, -1, NULL, NULL}
+ }

+
+

- One structure for write byte operations:

+
+

struct M68K_DATA data_wb[] + = {
+     {0x000000, 0x01FFFF, NULL, rom},
+     {0x300000, 0x407FFF, NULL, ram1 - 0x300000},
+     {0x500000, 0x507FFF, NULL, ram2 - 0x500000},
+     {0x600000, 0x601FFF, mem_access, NULL},
+     {0x800000, 0x80AFFF, NULL, ram4 - 0x800000},
+     {-1, -1, NULL, NULL}
+ }

+
+

- One structure for read word operations:

+
+

struct M68K_DATA data_rw[] + = {
+     {0x000000, 0x01FFFF, NULL, rom},
+     {0x300000, 0x407FFF, NULL, ram1 - 0x300000},
+     {0x500000, 0x507FFF, NULL, ram2 - 0x500000},
+     {0x600000, 0x601FFF, mem_access, NULL},
+     {0x800000, 0x80AFFF, NULL, ram4 - 0x800000},
+     {-1, -1, NULL, NULL}
+ }

+
+

- One structure for write word operations:

+
+

struct M68K_DATA data_ww[] + = {
+     {0x000000, 0x01FFFF, NULL, rom},
+     {0x300000, 0x407FFF, NULL, ram1 - 0x300000},
+     {0x500000, 0x507FFF, NULL, ram2 - 0x500000},
+     {0x600000, 0x601FFF, mem_access, NULL},
+     {0x800000, 0x80AFFF, NULL, ram4 - 0x800000},
+     {-1, -1, NULL, NULL}
+ }

+
+

In the example, the routine used for access to ram3 area is the same in all + the structures defined but it could be different.

+

And now the last step is to fill the CPU context with the defined address spaces. + This is accomplished in the following way:

+
+

struct M68K_CONTEXT cpu_contxt;

+

cpu_contxt.sv_fetch = prg_fetch;
+ cpu_contxt.user_fetch = prg_fetch;

+

cpu_contxt.sv_read_byte + = data_rb;
+ cpu_contxt.user_read_byte = data_rb;
+ cpu_contxt.sv_read_word = data_rw;
+ cpu_contxt.user_read_word = data_rw;
+ cpu_contxt.sv_write_byte = data_wb;
+ cpu_contxt.user_write_byte = data_wb;
+ cpu_contxt.sv_write_word = data_ww;
+ cpu_contxt.user_write_word = data_ww;

+
+

Note that the memory address spaces for supervisor and user are the same. This + is very common but remember they could be different.

+

And that is all.
+

+

4.2. Memory handling

+

The emulation library provides two ways to perform the access to the memory + map: built-in and custom.

+

The built-in memory handling is ideal to get the maximun speed to the memory + map but at the cost of less control. To use it you have to set data + pointing to the beginning of the native memory region and set mem_handler + to NULL.

+
+

struct M68K_DATA
+ {
+     unsigned low_addr;
+     unsigned high_addr;
+     void *mem_handler;
+     void *data;
+ }

+
+

The custom memory handling gives you total control over memory accesses but + its use could create a bottleneck in the emulated system if it is used inappropriately. + To use this feature you have to set up mem_handler pointer to the handling + function. That function will be called whenever a memory access is done.
+ There is a restriction in the definition of a memory region: it must be 4 + KB aligned. So it must start on 0XXX000h and end on 0YYYFFFh.

+

Memory handling functions have the following structure:

+

int  read_xxxx (int address);
+ void write_xxxx(int address, int data);

+

where xxxx stands for byte, word or long depending on data size, + address is the memory address accessed and data is the data itself.

+

Using memory handling functions might be a good way to customize emulated memory + space. You have to read/write data in the way FAME expects. This process could + become confusing. To avoid undesired problems in this point, i have written + some simple routines to make your life easier:

+
+

int readbyte(int address)
+ {
+     return ram[address^1];
+ }

+
+ int readword(int address)
+ {
+     return ((unsigned short *)ram)[address>>1]; +
+ }
+

+ void writebyte(int address, + int data)
+ {
+     ram[address^1] = data & 0xFF;
+ }

+
+ void writeword(int address, + int data)
+ {
+     ((unsigned short *)ram)[address>>1] = data & + 0xFFFF;
+ }

+
+

I am considering you have your emulated memory region (pointed by ram here) + in native endian format (this is, big endian for the 68000 processor). Note + the required endianess switch in the byte accesses, since we are reading in + a little endian machine (x86 and SH4 processors).

+


+ 4.3. Running the CPU

+

In order to get the 68000 CPU running, you have to do the following steps:

+
+

1. Initialize the emulation library. Call m68k_init() + to perform this task.
+ 2. Set up the memory map (see section 4.1.2).
+ 3. Reset the processor calling the m68k_reset() + function.
+ 4. Execute code calling m68k_emulate(n) + function where the parameter n means the number of clock cycles to execute.

+
+

Note: See Function Reference section + for more information about how API functions work.

+

 

+ + + + +
 5. + Interrupts and exceptions
+

The library currently emulates the group 0 exceptions (address + error and bus error), group 1 exceptions (trace mode, external + interrupts, illegal opcode and privilege violation) and group 2 exceptions.

+

The reset exception is not emulated. This is due to performance + facts. If this exception was emulated, the performance of the library would + fall notably. If you need this exception be emulated, contact me.

+

Hardware interrupts can be raised at any time, but it will be attended only in the entry code of the emulate function. To manage interrupts, please refer to the section Function Reference bellow.

+

If you have any doubt about how these events work, I recommend you to take a look at M68000 Microprocessors User's Manual (english) or at the book Sistemas Digitales (spanish).

+

5.1. Interrupt + acknowledge

+

Sometimes could be useful to be warned when a hardware interrupt is being attended. + This feature is frequently called interrupt acknowledging and allows + you to take specific actions when an interrupt is handled, signaling a device + to lower the interrupt request, for example.
+ This function accepts one parameter, the interrupt level, and returns no value.

+
+

void iackhandler(unsigned + int_level);

+
+

Once you have defined your function, set up the CPU context:

+
+

struct M68K_CONTEXT cpu_contxt;

+

cpu_contxt.iack_handler + = iackhandler;
+ m68k_set_context(&cpu_contxt);

+
+

If you do not need this feature, set this pointer to NULL to avoid undesired + results.

+

5.2. + Customizing processing (HLE)

+

Sometimes it is needed to trap an exception to perform some native tasks overriding + target system tasks (system BIOS calls, for example).

+

To customize interrupt and exception processing use icusthandler table + pointer. This pointer must point to a table of a total of 256 function pointers, + each one handling each vector exception presented in the 68000 system starting + from address $000000. The index of the table is the vector number.

+

The handling function accepts one parameter, the vector exception number, and + returns no value.

+
+

void icusthandler(unsigned + vector);

+
+

The array of pointers could be used in this fashion:

+
+

/* Function to customize + CHK exception */
+ void chk_handler(unsigned vector)
+ {
+     . . .
+     (some actions)
+     . . .
+ }

+

unsigned fpa[256];               /* + Function Pointer Array declaration */
+ struct M68K_CONTEXT
+ cpu_context;
+
+ fpa[6] = chk_handler;            /* + Customizing CHK exception */
+ cpu_context.icust_handler = fpa;  /* Setting up function pointers */

+
+

Take in account the + following when you use this feature:

+
    +
  •  Remember to set to NULL those exceptions you do not want to be customized + in the array of function pointers.
  • +
  • +
     Group 0 exceptions are a special type of exception. Since they are + raised when something has gone seriously wrong with the system, they can not + be customized.
    +
  • +
+

If you do not need this feature, set this pointer to NULL to avoid undesired + results.

+

5.3. IRQ lowering

+

Every IRQ will be automatically lowered once it has been attended. User selectable IRQ lowering type has been removed.

+

 

+ + + + +
 6. + Function Reference
+

This is a brief description of the library functions.

+
    +
  •  For C/C++ programmers: +

    - They are declared in fame.h to include in your C/C++ application. You + can take a look at the sample program included.

    +
  • +
  •  For Delphi programmers: +

    - Copy fame.pas and fame.dll into your project's directory. Add fame.pas + to your project.

    +
  • +
+


+ Remember that this is a brief overview. If you do not find answers to your questions, + contact me.

+


+ 6.1. General Purpose Functions
+

+- void m68k_init (void) +
+

This function initialize the emulation library. Must be called before any other + function library.
+

+- unsigned m68k_reset (void) +
+

Resets the CPU. You must set up the memory map before call this function.

+

Return values:

+
+

M68K_OK (0): Success.
+ M68K_RUNNING (1): The function failures because the CPU is + running. Stop the CPU first.
+ M68K_NO_SUP_ADDR_SPACE (2): The CPU could not be resetted + because there is no supervisor memory map for opcode fetching.
+

+
+- void m68k_emulate (int n) +
+

Starts the emulation and executes n clock cycles. This is the function you + have to call to execute 68000's code. The number of elapsed CPU cycles is the + lowest number equal or greater than n.

+


+- unsigned m68k_get_pc (void)

+
+

Returns the current PC address. The value returned by this function does not + have to be equal to the beginning of an instruction.
+

+- unsigned m68k_get_cpu_state (void) +
+

Returns information about the CPU current state. It could be called at any + time to retrieve interesting and useful information about the CPU state.

+

The data returned has the following format:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
BitsMeaning
0Internal use. Should be zero.
1Processing a group 0 exception (address error or bus error).
2Double bus fault has happened.
3Trace mode is being processed.
4Processing trace mode exception.
5Processing bus error exception.
6Processing address error exception.
7CPU stopped by the STOP instruction.
8-31Reserved for future use. Should be zero.
+
+

 

+- int m68k_fetch(unsigned address, unsigned memory_space) +
+

Fetches the word pointed by the specified address using the given memory space + from the fetch memory array. The memory space means the following:

+
    +
  •  Supervisor address space (M68K_SUP_ADDR_SPACE)
  • +
  •  User address space (M68K_USER_ADDR_SPACE)
  • +
  •  Data address space (M68K_DATA_ADDR_SPACE)
  • +
  •  Program address space (M68K_PROG_ADDR_SPACE)
  • +
+

Generally, you will want to fetch a word from a memory map joining two of those + primitives types. For example:

+
+

Supervisor Data Address Space (Supervisor & Data)

+
+

To accomplish this, you have to use a bitwise OR operation:

+
+

M68K_SUP_ADDR_SPACE | M68K_DATA_ADDR_SPACE

+
+

Return value:

+
+

FFFFFFFFh: The address specified is out of bounds in the + given
+ memory space.
+ 0000xxxxh: The fetched word.

+
+


+ 6.2 + Hardware interrupt handling functions
+

+- int m68k_raise_irq (int level, int vector) +
+

This function allows you to generate a hardware interrupt. This event is external + to the CPU and generally activated by an external device. The possible values + for the parameter level are between 1 and 7, both inclusive.

+

For vector the values are the following:

+
+

M68K_AUTOVECTORED_IRQ (-1): Autovectored interrupt.
+ M68K_SPURIOUS_IRQ (-2): Spurious interrupt.
+ 0-255: Vector number.

+
+

Return value:

+
+

M68K_OK (0): Success.
+ M68K_INT_LEVEL_ERROR (-1): The function failures because + there is another interrupt activated at the given level.
+ M68K_INT_INV_PARAMS (-2): Invalid parameter values. The vector + value is not valid or the level is equal to zero.

+
+


+int m68k_lower_irq (int level)

+
+

This function is used to deactivate an interrupt.

+

Return value:

+
+

M68K_OK (0): The interrupt has been deactivated successfully.
+ M68K_IRQ_LEVEL_ERROR (-1): The function failures because + the interrupt is not activated.
+ M68K_IRQ_INV_PARAMS (-2): Invalid interrupt level value.

+
+


+- int m68k_get_irq_vector (int level)

+
+

Calling this function you will get the vector of a generated interrupt at the + given interrupt level.

+

Return value:

+
+

> -1: Requested interrupt vector.
+ M68K_IRQ_LEVEL_ERROR (-1): The function failures because + the interrupt is not activated.
+ M68K_IRQ_INV_PARAMS (-2): Invalid interrupt level.
+

+
+- int m68k_change_irq_vector (int level, int vector) +
+

It allows you to change the vector of a generated interrupt. Remember that + the interrupt must be already activated when you call this function.
+ The possible values for vector are between 0 and 255, both inclusive.

+

Return value:

+
+

M68K_OK (0): Success.
+ M68K_IRQ_LEVEL_ERROR (-1): The interrupt at the given vector + was not activated.
+ M68K_IRQ_INV_PARAMS (-2): Invalid interrupt vector value.

+
+


+ 6.3. + CPU context handling functions
+

+

These functions are intented for handling the CPU context.
+

+- int m68k_get_context_size (void) +
+

Returns the size in bytes of the CPU context.
+

+- void m68k_get_context (void *context) +
+

Fills the context pointed by the pointer with the current CPU context. You + must deserve memory space in order to allocate the CPU context.
+

+- void m68k_set_context (void *context) +
+

Allows you to set up the CPU context. The parameter is a pointer to the context + structure.
+

+- int m68k_get_register (m68k_register reg) +
+

Returns the value of the specified register. If the value of the reg parameter + is not valid, the function will return -1.

+

Note that the value returned by the function when the register specified is + not valid (-1) is a valid 32-bit register value. This may be cause for concern.
+

+- int m68k_set_register (m68k_register reg, unsigned value) +
+

Sets the value of the specified register.

+

Return values:

+
+

M68K_OK (0): Success.
+ M68K_INV_REG (-1): The register specified is not valid.

+
+


+ 6.4. Timing functions

+

These functions allows you to control the CPU cycles executed in the emulation. + This way, you can adjust the emulation speed. The cycles_counter + is the variable used in the library to count the CPU cycles. For each calling + to function emulate, the executed CPU cycles are added to cycles_counter.
+

+- unsigned m68k_get_cycles_counter (void) +
+

Returns the current value of the cycles_counter.
+

+- unsigned m68k_trip_cycles_counter (void) +
+

Returns the current value of the cycles_counter variable and resets + it to zero.

+
+- unsigned m68k_control_cycles_counter (int n) +
+

If the parameter n is equal to zero, the function returns the cycles_counter.
+ Otherwise, it returns the cycles_counter resetting it to zero.
+

+- void m68k_release_timeslice (void) +
+

Calling this function you will request the CPU to finish its execution as soon + as possible. The premature exit will be reflected in the cycles_counter.
+

+

- void m68k_add_cycles (int cycles)

+
+

Call this function when you want to increase the clock cycles counting (cycles_counter + variable).
+ This function could be useful when emulating systems equipped with DMA capabilities, + keeping track of how many clock cycles the CPU was frozen by any device doing + a DMA operation.
+

+

- void m68k_release_cycles (int cycles)

+
+

Call this function when you want to decrease the clock cycles counting (cycles_counter + variable).

+

 

+ + + + +
 7. + Multi-CPU systems
+

Emulating multiple 68000 processors is fairly simple. If you want to emulate + more than one 68000 processor, you have to set up a CPU context and a memory + map for each one (see memory map example).

+

For example, you would do this:

+
+

struct M68K_CONTEXT my_contexts[NUMBER_OF_PROCESSORS];

+

for (int i = 0; i < NUMBER_OF_PROCESSORS; + i++)
+ {
+     m68k_set_context(&my_contexts[i]);
+     m68k_emulate(100);
+     m68k_get_context(&my_contexts[i]);
+ }

+
+

Try to compensate the overhead due to the copying of the contexts emulating + the CPUs in large timeslices.

+

FAME is non-reentrant so you cannot multi-thread several processors. If you + need FAME running in this way, contact me.

+

 

+ + + + +
 8. + Helpful tips
+

- It is recommended to use built-in memory handlers as much as possible because + they should be much faster than others coded into high level languages.

+

- Use timeslices as large as possible because this way you will reduce the overhead + produced by the entry and exit code of the library.

+

- Try to avoid context swapping. It will reduce performance notably.

+

- It is a good idea to call the emulate function with a variable + number of cycles instead of a fixed one. Keep track of how many cycles overflowed + from the last call to emulate and subtract them in the next calling:

+
+

#define CPU_TIMESLICE 100

+

cpu_context.cycles_counter = 0;
+ while(!done)
+ {
+     if (cpu_context.cycles_counter < CPU_TIMESLICE)
+     {
+         m68k_emulate(CPU_TIMESLICE + - cpu_context.cycles_counter);
+     }
+     cpu_context.cycles_counter -= CPU_TIMESLICE;
+ }

+
+

- Library routines were designed with accuracy and speed in mind. Use them + as much as possible in order to reach a fast and accurate emulated system.

+

- The object code contains many symbols for program relocation. Strip your executable when you are done.

+

 

+ + + + +
 9. + Troubleshooting
+

This section tries to help you to get the library working correctly. I hope + you find this section useful.

+

- Remember to call init function before any other function library. + It initialize the library setting up the emulator.

+

- You must call reset function before starting the emulation in order + to get the library working appropriately.

+

- Set up your memory map before reset the CPU. The reset function look up + the vector table.

+

- Ensure that the CPU context has been set correctly after the calling to + set_context.

+

- Check if memory maps are well-constructed. Every memory region must be 4 + KB aligned. This is a common pitfall.

+

- Check if your emulated processor is accessing memory correctly specially + when you have to use memory handling functions. Take a look at the Memory + handling section if you are having problems in this point.

+

- Make sure to set reset_handler, iack_handler and icust_handler + to NULL if you are not using these features. It would be a good idea to set + every byte of a new context to zero to avoid any problem.

+

- Remember to set every handler not used in the array of function pointers + (icust_handler) to NULL to avoid undesired results.

+

- Remember to include fame.h in any C module that use FAME. This header + file is subject to change in future versions.

+

- Make sure to instruct your compiler configuration to treat enum types as + 32-bit ints when using m68k_get_register + and m68k_set_register functions.

+

 

+ + + + +
 10. + Known bugs
+

- The bit I/N (specific information about the processor activity) saved on + the supervisor stack when an address or bus error happens is not calculated + and its value is fixed to one (instruction). This tiny detail will be implemented + in future versions if needed.

+

 

+ + + + +
 11. + Special thanks
+

Many thanks go out to those who helped me out with this library or contributed + to the project in any form in no special order.

+

- Chui for his invaluable work to get this thing up into his NeoGeo + emulator (Neo4All) and for helping me to fix loads of errors.
+ - Bart Trzynadlowski (trzynadl@unr.nevada.edu) + for his notes about 68000 undocumented behavior.
+ - Julio César Álvarez Acosta (julio_a_a@yahoo.es) + for his help to build the import library.
+ - Richard Hollstein for let me know that memory handling functions + were not documented in previous releases.
+ - Jorge Cwik for figuring out the algorithm to calculate the exact number of cycles in DIV instructions.
+ - Neill Corlett for his excellent Starscream 680x0 emulation library + which give me a lot of understandings and ideas on CPU emulation.
+ - Stéphane Dallongeville for Gens (probably the best Genesis/Mega + Drive emulator ever programmed) and for giving me his opinion about several + aspects of 68000 emulation.
+ - BlackAura and Ian Micheal for telling me about the high + level emulation (HLE) feature.
+ - Juan Carlos Hernández Martín (jmartin@uax.es) + for his interest in this project.
+ - Antonio García Guerra for his great book Sistemas Digitales.
+ - The creators of the 68000 microprocessor, because without their work nothing + of this might be a reality.
+
+ Thank you too! for your interest in the library. If you have any suggestions, +comments or contributions do not hesitate to get in contact with me.

+

Have a nice day!

+ + diff --git a/cpu/fame/famec.c b/cpu/fame/famec.c index 2f10540d..b0ee0f07 100644 --- a/cpu/fame/famec.c +++ b/cpu/fame/famec.c @@ -24,7 +24,7 @@ #define FAMEC_CHECK_BRANCHES #define FAMEC_EXTRA_INLINE // #define FAMEC_DEBUG -// #define FAMEC_NO_GOTOS +#define FAMEC_NO_GOTOS #define FAMEC_ADR_BITS 24 // #define FAMEC_FETCHBITS 8 #define FAMEC_DATABITS 8 @@ -35,7 +35,17 @@ #define PICODRIVE_HACK // Options // - +#ifndef FAMEC_NO_GOTOS +// computed gotos is a GNU extension +#ifndef __GNUC__ +#define FAMEC_NO_GOTOS +#endif +// as of 3.3, clang takes over 3h to compile this in computed goto mode.. +#ifdef __clang__ +#define FAMEC_NO_GOTOS +#endif +#endif + #undef INLINE #ifdef _MSC_VER #define INLINE @@ -50,51 +60,6 @@ #define FAMEC_EXTRA_INLINE INLINE #endif -#ifdef u8 -#undef u8 -#endif - -#ifdef s8 -#undef s8 -#endif - -#ifdef u16 -#undef u16 -#endif - -#ifdef s16 -#undef s16 -#endif - -#ifdef u32 -#undef u32 -#endif - -#ifdef s32 -#undef s32 -#endif - -#ifdef uptr -#undef uptr -#endif - -#define u8 unsigned char -#define s8 signed char -#define u16 unsigned short -#define s16 signed short -#define u32 unsigned int -#define s32 signed int -#define uptr unsigned long - -/* -typedef unsigned char u8; -typedef signed char s8; -typedef unsigned short u16; -typedef signed short s16; -typedef unsigned int u32; -typedef signed int s32; -*/ - #ifndef M68K_OK #define M68K_OK 0 #endif @@ -218,21 +183,29 @@ typedef signed int s32; // internals core macros ///////////////////////// -#define DREG(X) (m68kcontext.dreg[(X)].D) -#define DREGu32(X) (m68kcontext.dreg[(X)].D) -#define DREGs32(X) (m68kcontext.dreg[(X)].SD) -#define DREGu16(X) (m68kcontext.dreg[(X)].W) -#define DREGs16(X) (m68kcontext.dreg[(X)].SW) -#define DREGu8(X) (m68kcontext.dreg[(X)].B) -#define DREGs8(X) (m68kcontext.dreg[(X)].SB) +// helper macros +#define BITCOUNT(r,v) \ + (r = (v) - (((v)>>1)&0x55555555), r = (r&0x33333333) + ((r>>2)&0x33333333), \ + r = (((r + (r>>4))&0x0f0f0f0f) * 0x01010101)>>24) -#define AREG(X) (m68kcontext.areg[(X)].D) -#define AREGu32(X) (m68kcontext.areg[(X)].D) -#define AREGs32(X) (m68kcontext.areg[(X)].SD) -#define AREGu16(X) (m68kcontext.areg[(X)].W) -#define AREGs16(X) (m68kcontext.areg[(X)].SW) +#define XB MEM_LE4(0) +#define XW MEM_LE2(0) -#define ASP (m68kcontext.asp) +#define DREG(X) (ctx->dreg[(X)].D) +#define DREGu32(X) (ctx->dreg[(X)].D) +#define DREGs32(X) (ctx->dreg[(X)].SD) +#define DREGu16(X) (ctx->dreg[(X)].W[XW]) +#define DREGs16(X) (ctx->dreg[(X)].SW[XW]) +#define DREGu8(X) (ctx->dreg[(X)].B[XB]) +#define DREGs8(X) (ctx->dreg[(X)].SB[XB]) + +#define AREG(X) (ctx->areg[(X)].D) +#define AREGu32(X) (ctx->areg[(X)].D) +#define AREGs32(X) (ctx->areg[(X)].SD) +#define AREGu16(X) (ctx->areg[(X)].W[XW]) +#define AREGs16(X) (ctx->areg[(X)].SW[XW]) + +#define ASP (ctx->asp) #define LSL(A, C) ((A) << (C)) #define LSR(A, C) ((A) >> (C)) @@ -255,45 +228,51 @@ typedef signed int s32; #define ROR_33(A, C) (LSR_32(A, C) | LSL_32(A, 33-(C))) #ifndef FAMEC_NO_GOTOS -#define NEXT \ +#define NEXT { \ FETCH_WORD(Opcode); \ - goto *JumpTable[Opcode]; + goto *JumpTable[Opcode]; \ +} #ifdef FAMEC_ROLL_INLINE -#define RET(A) \ - m68kcontext.io_cycle_counter -= (A); \ - if (m68kcontext.io_cycle_counter <= 0) goto famec_Exec_End; \ - NEXT +#define RET(A) { \ + ctx->io_cycle_counter -= (A); \ + if (ctx->io_cycle_counter <= 0) goto famec_Exec_End; \ + NEXT \ +} #else -#define RET(A) \ - m68kcontext.io_cycle_counter -= (A); \ - if (m68kcontext.io_cycle_counter <= 0) goto famec_Exec_End; \ - goto famec_Exec; +#define RET(A) { \ + ctx->io_cycle_counter -= (A); \ + if (ctx->io_cycle_counter <= 0) goto famec_Exec_End; \ + goto famec_Exec; \ +} #endif -#define RET0() \ - m68kcontext.io_cycle_counter = -6; \ - goto famec_End; +#define RET0() { \ + ctx->io_cycle_counter = -6; \ + goto famec_End; \ +} #else #define NEXT \ - do{ \ - FETCH_WORD(Opcode); \ - JumpTable[Opcode](); \ - }while(m68kcontext.io_cycle_counter>0); + do { \ + FETCH_WORD(Opcode); \ + JumpTable[Opcode](ctx); \ + } while (ctx->io_cycle_counter > 0); -#define RET(A) \ - m68kcontext.io_cycle_counter -= (A); \ - return; +#define RET(A) { \ + ctx->io_cycle_counter -= (A); \ + return; \ +} -#define RET0() \ - m68kcontext.io_cycle_counter = -6; \ - return; +#define RET0() { \ + ctx->io_cycle_counter = -6; \ + return; \ +} #endif -#define M68K_PPL (m68kcontext.sr >> 8) & 7 +#define M68K_PPL (ctx->sr >> 8) & 7 #define GET_PC \ (u32)((uptr)PC - BasePC) @@ -311,7 +290,7 @@ typedef signed int s32; { \ u32 pc = A; \ FORCE_ALIGNMENT(pc); \ - BasePC = m68kcontext.Fetch[(pc >> M68K_FETCHSFT) & M68K_FETCHMASK]; \ + BasePC = ctx->Fetch[(pc >> M68K_FETCHSFT) & M68K_FETCHMASK]; \ PC = (u16*)((pc & M68K_ADR_MASK) + BasePC); \ } @@ -321,7 +300,7 @@ typedef signed int s32; { \ u32 pc = A; \ FORCE_ALIGNMENT(pc); \ - BasePC = m68kcontext.Fetch[(pc >> M68K_FETCHSFT) & M68K_FETCHMASK]; \ + BasePC = ctx->Fetch[(pc >> M68K_FETCHSFT) & M68K_FETCHMASK]; \ BasePC -= pc & 0xFF000000; \ PC = (u16*)(pc + BasePC); \ } @@ -336,29 +315,29 @@ typedef signed int s32; // CCnt = io_cycle_counter; #define READ_BYTE_F(A, D) \ - D = m68kcontext.read_byte(A) & 0xFF; + D = ctx->read_byte(A) & 0xFF; #define READ_WORD_F(A, D) \ - D = m68kcontext.read_word(A) & 0xFFFF; + D = ctx->read_word(A) & 0xFFFF; #define READ_LONG_F(A, D) \ - D = m68kcontext.read_long(A); + D = ctx->read_long(A); #define READSX_LONG_F READ_LONG_F #define WRITE_LONG_F(A, D) \ - m68kcontext.write_long(A, D); + ctx->write_long(A, D); #define WRITE_LONG_DEC_F(A, D) \ - m68kcontext.write_word((A) + 2, (D) & 0xFFFF); \ - m68kcontext.write_word((A), (D) >> 16); + ctx->write_word((A) + 2, (D) & 0xFFFF); \ + ctx->write_word((A), (D) >> 16); #define PUSH_32_F(D) \ AREG(7) -= 4; \ - m68kcontext.write_long(AREG(7), D); + ctx->write_long(AREG(7), D); #define POP_32_F(D) \ - D = m68kcontext.read_long(AREG(7)); \ + D = ctx->read_long(AREG(7)); \ AREG(7) += 4; #ifndef FAME_BIG_ENDIAN @@ -430,23 +409,23 @@ typedef signed int s32; #endif #define READSX_BYTE_F(A, D) \ - D = (s8)m68kcontext.read_byte(A); + D = (s8)ctx->read_byte(A); #define READSX_WORD_F(A, D) \ - D = (s16)m68kcontext.read_word(A); + D = (s16)ctx->read_word(A); #define WRITE_BYTE_F(A, D) \ - m68kcontext.write_byte(A, D); + ctx->write_byte(A, D); #define WRITE_WORD_F(A, D) \ - m68kcontext.write_word(A, D); + ctx->write_word(A, D); #define PUSH_16_F(D) \ - m68kcontext.write_word(AREG(7) -= 2, D); \ + ctx->write_word(AREG(7) -= 2, D); \ #define POP_16_F(D) \ - D = (u16)m68kcontext.read_word(AREG(7)); \ + D = (u16)ctx->read_word(AREG(7)); \ AREG(7) += 2; #define GET_CCR \ @@ -491,17 +470,17 @@ typedef signed int s32; #endif #define CHECK_INT_TO_JUMP(CLK) \ - if (interrupt_chk__()) \ + if (interrupt_chk__(ctx)) \ { \ - cycles_needed=m68kcontext.io_cycle_counter-(CLK); \ - m68kcontext.io_cycle_counter=(CLK); \ + cycles_needed=ctx->io_cycle_counter-(CLK); \ + ctx->io_cycle_counter=(CLK); \ } #ifdef FAMEC_CHECK_BRANCHES #ifdef FAMEC_NO_GOTOS -#define CHECK_BRANCH_EXCEPTION_GOTO_END m68kcontext.io_cycle_counter=0; return; +#define CHECK_BRANCH_EXCEPTION_GOTO_END ctx->io_cycle_counter=0; return; #else #define CHECK_BRANCH_EXCEPTION_GOTO_END goto famec_Exec_End; #endif @@ -510,8 +489,8 @@ typedef signed int s32; if ((_PC_)&1) \ { \ u32 new_PC, pr_PC=GET_PC; \ - m68kcontext.execinfo |= FM68K_EMULATE_GROUP_0; \ - new_PC = execute_exception_group_0(M68K_ADDRESS_ERROR_EX, 0, pr_PC, 0x12 ); \ + ctx->execinfo |= FM68K_EMULATE_GROUP_0; \ + new_PC = execute_exception_group_0(ctx, M68K_ADDRESS_ERROR_EX, 0, pr_PC, 0x12 ); \ SET_PC(new_PC); \ CHECK_BRANCH_EXCEPTION_GOTO_END \ } @@ -519,38 +498,33 @@ typedef signed int s32; #define CHECK_BRANCH_EXCEPTION(_PC_) #endif +#ifdef FAMEC_NO_GOTOS +#define Opcode ctx->Opcode +#define cycles_needed ctx->cycles_needed +#define PC ctx->PC +#define BasePC ctx->BasePC +#define flag_C ctx->flag_C +#define flag_V ctx->flag_V +#define flag_NotZ ctx->flag_NotZ +#define flag_N ctx->flag_N +#define flag_X ctx->flag_X +#endif + +#define flag_T ctx->flag_T +#define flag_S ctx->flag_S +#define flag_I ctx->flag_I // global variable /////////////////// -/* Current CPU context */ -M68K_CONTEXT *g_m68kcontext; -#define m68kcontext (*g_m68kcontext) - -#ifdef FAMEC_NO_GOTOS -#define Opcode m68kcontext.Opcode -#define cycles_needed m68kcontext.cycles_needed -#define PC m68kcontext.PC -#define BasePC m68kcontext.BasePC -#define flag_C m68kcontext.flag_C -#define flag_V m68kcontext.flag_V -#define flag_NotZ m68kcontext.flag_NotZ -#define flag_N m68kcontext.flag_N -#define flag_X m68kcontext.flag_X -#endif - -#define flag_T m68kcontext.flag_T -#define flag_S m68kcontext.flag_S -#define flag_I m68kcontext.flag_I - static u32 initialised = 0; #ifdef PICODRIVE_HACK -extern M68K_CONTEXT PicoCpuFM68k, PicoCpuFS68k; +extern M68K_CONTEXT PicoCpuFS68k; #endif /* Custom function handler */ -typedef void (*opcode_func)(void); +typedef void (*opcode_func)(M68K_CONTEXT *ctx); static opcode_func JumpTable[0x10000]; @@ -562,13 +536,13 @@ static const s32 exception_cycle_table[256] = 50, // 2: Bus Error 50, // 3: Address Error 34, // 4: Illegal Instruction - 38, // 5: Divide by Zero - 40, // 6: CHK + 34, // 5: Divide by Zero + 34, // 6: CHK 34, // 7: TRAPV 34, // 8: Privilege Violation 34, // 9: Trace - 4, // 10: - 4, // 11: + 34, // 10: Line A + 34, // 11: Line F 4, // 12: RESERVED 4, // 13: Coprocessor Protocol Violation 4, // 14: Format Error @@ -630,6 +604,7 @@ static const s32 exception_cycle_table[256] = 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4 }; +static int init_jump_table(void); /***********************/ /* core main functions */ @@ -646,8 +621,8 @@ void fm68k_init(void) puts("Initializing FAME..."); #endif - if (!initialised) - fm68k_emulate(0, 0); + if (!initialised) + init_jump_table(); #ifdef FAMEC_DEBUG puts("FAME initialized."); @@ -663,33 +638,33 @@ void fm68k_init(void) /* M68K_NO_SUP_ADDR_SPACE (2): No se puede resetear porque no hay mapa */ /* de memoria supervisor de extraccion de opcodes */ /******************************************************************************/ -int fm68k_reset(void) +int fm68k_reset(M68K_CONTEXT *ctx) { if (!initialised) - fm68k_emulate(0, 0); + init_jump_table(); // Si la CPU esta en ejecucion, salir con M68K_RUNNING - if (m68kcontext.execinfo & M68K_RUNNING) + if (ctx->execinfo & M68K_RUNNING) return M68K_RUNNING; // Resetear registros - //memset(&m68kcontext.dreg[0], 0, 16*4); + //memset(&ctx->dreg[0], 0, 16*4); // Resetear interrupts, execinfo y ASP - m68kcontext.interrupts[0] = 0; - m68kcontext.execinfo = 0; + ctx->interrupts[0] = 0; + ctx->execinfo = 0; ASP = 0; // Fijar registro de estado - m68kcontext.sr = (m68kcontext.sr & 0xff) | 0x2700; + ctx->sr = (ctx->sr & 0xff) | 0x2700; // Obtener puntero de pila inicial y PC - AREG(7) = m68kcontext.read_long(0); - m68kcontext.pc = m68kcontext.read_long(4); + AREG(7) = ctx->read_long(0); + ctx->pc = ctx->read_long(4); #ifdef FAMEC_DEBUG puts("Reset 68k done!\n"); - printf("PC = 0x%08X\n",m68kcontext.pc); + printf("PC = 0x%08X\n",ctx->pc); #endif return M68K_OK; @@ -701,37 +676,40 @@ int fm68k_reset(void) /* No recibe parametros */ /* Retorna 68k PC */ /****************************************************************************/ -u32 fm68k_get_pc(M68K_CONTEXT *context) +u32 fm68k_get_pc(const M68K_CONTEXT *ctx) { #ifdef FAMEC_NO_GOTOS - return (context->execinfo & M68K_RUNNING)?(uptr)PC-BasePC:context->pc; + return (ctx->execinfo & M68K_RUNNING)?(uptr)PC-BasePC:ctx->pc; #else - return context->pc; // approximate PC in this mode + return ctx->pc; // approximate PC in this mode #endif } ////////////////////////// // Chequea las interrupciones y las inicia -static FAMEC_EXTRA_INLINE s32 interrupt_chk__(void) +static FAMEC_EXTRA_INLINE s32 interrupt_chk__(M68K_CONTEXT *ctx) { - if (m68kcontext.interrupts[0] > flag_I) - return m68kcontext.interrupts[0]; + if (ctx->interrupts[0] > flag_I) + return ctx->interrupts[0]; return 0; } -int fm68k_would_interrupt(void) +int fm68k_would_interrupt(M68K_CONTEXT *ctx) { - return interrupt_chk__(); + return interrupt_chk__(ctx); } -static FAMEC_EXTRA_INLINE u32 execute_exception(s32 vect, u32 oldPC, u32 oldSR) +static FAMEC_EXTRA_INLINE u32 execute_exception(M68K_CONTEXT *ctx, s32 vect, u32 oldPC, u32 oldSR) { u32 newPC; //u32 oldSR = GET_SR; - m68kcontext.io_cycle_counter -= exception_cycle_table[vect]; + ctx->io_cycle_counter -= exception_cycle_table[vect]; +#ifdef FAMEC_EMULATE_TRACE + ctx->execinfo &= ~FM68K_EMULATE_TRACE; +#endif PRE_IO @@ -753,6 +731,7 @@ static FAMEC_EXTRA_INLINE u32 execute_exception(s32 vect, u32 oldPC, u32 oldSR) /* adjust SR */ flag_S = M68K_SR_S; + flag_T = 0; #ifndef FAMEC_32BIT_PC newPC&=M68K_ADR_MASK @@ -768,12 +747,12 @@ static FAMEC_EXTRA_INLINE u32 execute_exception(s32 vect, u32 oldPC, u32 oldSR) return newPC; } -static FAMEC_EXTRA_INLINE u32 execute_exception_group_0(s32 vect, s32 addr, u16 spec_info, u32 oldSR) +static FAMEC_EXTRA_INLINE u32 execute_exception_group_0(M68K_CONTEXT *ctx, s32 vect, s32 addr, u16 spec_info, u32 oldSR) { u32 newPC; u16 inst_reg = 0; - newPC = execute_exception(vect, addr, oldSR); - //if (!(m68kcontext.icust_handler && m68kcontext.icust_handler[vect])) + newPC = execute_exception(ctx, vect, addr, oldSR); + //if (!(ctx->icust_handler && ctx->icust_handler[vect])) { PUSH_16_F(inst_reg); PUSH_32_F(addr); @@ -785,7 +764,7 @@ static FAMEC_EXTRA_INLINE u32 execute_exception_group_0(s32 vect, s32 addr, u16 #ifdef FAMEC_NO_GOTOS -#define OPCODE(N_OP) static void OP_##N_OP(void) +#define OPCODE(N_OP) static void OP_##N_OP(M68K_CONTEXT *ctx) #define CAST_OP(N_OP) (opcode_func)&OP_##N_OP #include "famec_opcodes.h" #endif @@ -794,7 +773,7 @@ static FAMEC_EXTRA_INLINE u32 execute_exception_group_0(s32 vect, s32 addr, u16 // main exec function ////////////////////// -int fm68k_emulate(s32 cycles, int idle_mode) +int fm68k_emulate(M68K_CONTEXT *ctx, int cycles, fm68k_call_reason reason) { #ifndef FAMEC_NO_GOTOS u32 Opcode; @@ -806,31 +785,37 @@ int fm68k_emulate(s32 cycles, int idle_mode) u32 flag_NotZ; u32 flag_N; u32 flag_X; -#endif - if (!initialised) + switch (reason) { + case fm68k_reason_init: goto init_jump_table; - } - #ifdef PICODRIVE_HACK - if (idle_mode == 1) goto idle_install; - else if (idle_mode == 2) goto idle_remove; + case fm68k_reason_idle_install: + goto idle_install; + case fm68k_reason_idle_remove: + goto idle_remove; #endif + case fm68k_reason_emulate: + break; + } + PC = ctx->PC; + BasePC = ctx->BasePC; +#endif // FAMEC_NO_GOTOS // won't emulate double fault - // if (m68kcontext.execinfo & M68K_FAULTED) return -1; + // if (ctx->execinfo & M68K_FAULTED) return -1; // Cache PPL flag_I = M68K_PPL; - if (m68kcontext.execinfo & FM68K_HALTED) + if (ctx->execinfo & FM68K_HALTED) { - if (interrupt_chk__() <= 0) + if (interrupt_chk__(ctx) <= 0) { return cycles; } - m68kcontext.execinfo &= ~FM68K_HALTED; + ctx->execinfo &= ~FM68K_HALTED; } #ifdef FAMEC_DEBUG @@ -838,13 +823,13 @@ int fm68k_emulate(s32 cycles, int idle_mode) #endif /* Poner la CPU en estado de ejecucion */ - m68kcontext.execinfo |= M68K_RUNNING; + ctx->execinfo |= M68K_RUNNING; // Cache SR - SET_SR(m68kcontext.sr) + SET_SR(ctx->sr) // Fijar PC - SET_PC(m68kcontext.pc) + SET_PC(ctx->pc) #ifdef FAMEC_DEBUG printf("PC: %p\n",PC); @@ -852,33 +837,33 @@ int fm68k_emulate(s32 cycles, int idle_mode) #endif /* guardar ciclos de ejecucion solicitados */ - m68kcontext.io_cycle_counter = cycles; + ctx->io_cycle_counter = cycles; cycles_needed = 0; #ifdef FAMEC_EMULATE_TRACE - if (!(m68kcontext.execinfo & FM68K_EMULATE_TRACE)) + if (!(ctx->execinfo & FM68K_EMULATE_TRACE)) #endif { - s32 line=interrupt_chk__(); + s32 line=interrupt_chk__(ctx); if (line>0) { /* comprobar si hay rutina de acknowledge */ - if (m68kcontext.iack_handler != NULL) - m68kcontext.iack_handler(line); + if (ctx->iack_handler != NULL) + ctx->iack_handler(line); else - m68kcontext.interrupts[0] = 0; + ctx->interrupts[0] = 0; - SET_PC(execute_exception(line + 0x18, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, line + 0x18, GET_PC, GET_SR)); flag_I = (u32)line; - if (m68kcontext.io_cycle_counter <= 0) goto famec_End; + if (ctx->io_cycle_counter <= 0) goto famec_End; } #ifdef FAMEC_EMULATE_TRACE else if (flag_T) { - m68kcontext.execinfo |= FM68K_EMULATE_TRACE; - cycles_needed = m68kcontext.io_cycle_counter; - m68kcontext.io_cycle_counter=0; + ctx->execinfo |= FM68K_EMULATE_TRACE; + cycles_needed = ctx->io_cycle_counter; + ctx->io_cycle_counter=0; } #endif } @@ -904,15 +889,14 @@ famec_Exec: #endif #ifdef FAMEC_EMULATE_TRACE - if (m68kcontext.execinfo & FM68K_EMULATE_TRACE) + if (ctx->execinfo & FM68K_EMULATE_TRACE) { - m68kcontext.io_cycle_counter = cycles_needed; + ctx->io_cycle_counter += cycles_needed; cycles_needed = 0; - m68kcontext.execinfo &= ~FM68K_EMULATE_TRACE; - m68kcontext.execinfo |= FM68K_DO_TRACE; - SET_PC(execute_exception(M68K_TRACE_EX, GET_PC, GET_SR)); - flag_T=0; - if (m68kcontext.io_cycle_counter > 0) + ctx->execinfo &= ~FM68K_EMULATE_TRACE; + ctx->execinfo |= FM68K_DO_TRACE; + SET_PC(execute_exception(ctx, M68K_TRACE_EX, GET_PC, GET_SR)); + if (ctx->io_cycle_counter > 0) { //NEXT goto famec_Exec; @@ -923,24 +907,24 @@ famec_Exec: if (cycles_needed != 0) { u32 line; - m68kcontext.io_cycle_counter = cycles_needed; + ctx->io_cycle_counter += cycles_needed; cycles_needed = 0; - if (m68kcontext.io_cycle_counter <= 0) goto famec_End; - line=interrupt_chk__(); + //if (ctx->io_cycle_counter <= 0) goto famec_End; + line=interrupt_chk__(ctx); if (line>0) { - if (m68kcontext.iack_handler != NULL) - m68kcontext.iack_handler(line); + if (ctx->iack_handler != NULL) + ctx->iack_handler(line); else - m68kcontext.interrupts[0] = 0; + ctx->interrupts[0] = 0; - SET_PC(execute_exception(line + 0x18, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, line + 0x18, GET_PC, GET_SR)); flag_I = (u32)line; } #ifdef FAMEC_EMULATE_TRACE if (!(flag_T)) #endif - if (m68kcontext.io_cycle_counter > 0) + if (ctx->io_cycle_counter > 0) { //NEXT goto famec_Exec; @@ -948,21 +932,31 @@ famec_Exec: } famec_End: - m68kcontext.sr = GET_SR; - m68kcontext.pc = GET_PC; + ctx->sr = GET_SR; + ctx->pc = GET_PC; +#ifndef FAMEC_NO_GOTOS + ctx->PC = PC; + ctx->BasePC = BasePC; +#endif - m68kcontext.execinfo &= ~M68K_RUNNING; + ctx->execinfo &= ~M68K_RUNNING; #ifdef FAMEC_DEBUG printf("En really end...\n"); printf("PC: %p\n",PC); printf("BasePC: 0x%08x\n",BasePC); - printf("pc: 0x%08x\n",m68kcontext.pc); + printf("pc: 0x%08x\n",ctx->pc); #endif - return cycles - m68kcontext.io_cycle_counter; + return cycles - ctx->io_cycle_counter; +#ifndef FAMEC_NO_GOTOS init_jump_table: +#else +} + +static int init_jump_table(void) +#endif { u32 i, j; @@ -4992,7 +4986,12 @@ init_jump_table: JumpTable[fake_op_base] = JumpTable[fake_op_base|0x0200] = CAST_OP(0x4AFC); \ JumpTable[real_op] = CAST_OP(normal_handler) +#ifndef FAMEC_NO_GOTOS idle_install: +#else +int fm68k_idle_install(void) +#endif +{ // printf("install..\n"); INSTALL_IDLE(0x71fa, 0x66fa, idle_detector_bcc8, 0x6601_idle, 0x6601); INSTALL_IDLE(0x71f8, 0x66f8, idle_detector_bcc8, 0x6601_idle, 0x6601); @@ -5005,8 +5004,14 @@ idle_install: INSTALL_IDLE(0x7dfe, 0x60fe, idle_detector_bcc8, 0x6001_idle, 0x6001); INSTALL_IDLE(0x7dfc, 0x60fc, idle_detector_bcc8, 0x6001_idle, 0x6001); return 0; +} +#ifndef FAMEC_NO_GOTOS idle_remove: +#else +int fm68k_idle_remove(void) +#endif +{ // printf("remove..\n"); UNDO_IDLE(0x71fa, 0x66fa, 0x6601); UNDO_IDLE(0x71f8, 0x66f8, 0x6601); @@ -5019,9 +5024,26 @@ idle_remove: UNDO_IDLE(0x7dfe, 0x60fe, 0x6001); UNDO_IDLE(0x7dfc, 0x60fc, 0x6001); return 0; +} +#endif // PICODRIVE_HACK -#endif +#ifndef FAMEC_NO_GOTOS } -void *get_jumptab(void) { return JumpTable; } +static int init_jump_table(void) +{ + return fm68k_emulate(NULL, 0, fm68k_reason_init); +} +#ifdef PICODRIVE_HACK +int fm68k_idle_install(void) +{ + return fm68k_emulate(NULL, 0, fm68k_reason_idle_install); +} + +int fm68k_idle_remove(void) +{ + return fm68k_emulate(NULL, 0, fm68k_reason_idle_remove); +} +#endif +#endif // FAMEC_NO_GOTOS diff --git a/cpu/fame/famec_opcodes.h b/cpu/fame/famec_opcodes.h index c690b45c..7a7832cf 100644 --- a/cpu/fame/famec_opcodes.h +++ b/cpu/fame/famec_opcodes.h @@ -1,6 +1,6 @@ #ifdef PICODRIVE_HACK -#define NOT_POLLING g_m68kcontext->not_polling = 1; +#define NOT_POLLING ctx->not_polling = 1; #else #define NOT_POLLING #endif @@ -644,7 +644,7 @@ OPCODE(0x007C) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); #ifdef USE_CYCLONE_TIMING RET(0) #else @@ -1074,11 +1074,7 @@ OPCODE(0x0280) flag_NotZ = res; flag_N = res >> 24; DREGu32((Opcode >> 0) & 7) = res; -#ifdef USE_CYCLONE_TIMING -RET(14) -#else RET(16) -#endif } // ANDI @@ -1302,8 +1298,12 @@ OPCODE(0x027C) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(20) } @@ -1952,8 +1952,12 @@ OPCODE(0x0A7C) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING RET(0) +#else + RET(4) +#endif } RET(20) } @@ -3904,7 +3908,12 @@ OPCODE(0x0840) flag_NotZ = res & src; res ^= src; DREGu32((Opcode >> 0) & 7) = res; +#ifdef USE_CYCLONE_TIMING + if (src >> 16) ctx->io_cycle_counter -= 2; +RET(10) +#else RET(12) +#endif } // BCHGn @@ -4087,7 +4096,12 @@ OPCODE(0x0880) flag_NotZ = res & src; res &= ~src; DREGu32((Opcode >> 0) & 7) = res; +#ifdef USE_CYCLONE_TIMING + if (src >> 16) ctx->io_cycle_counter -= 2; +RET(12) +#else RET(14) +#endif } // BCLRn @@ -4270,7 +4284,12 @@ OPCODE(0x08C0) flag_NotZ = res & src; res |= src; DREGu32((Opcode >> 0) & 7) = res; +#ifdef USE_CYCLONE_TIMING + if (src >> 16) ctx->io_cycle_counter -= 2; +RET(10) +#else RET(12) +#endif } // BSETn @@ -4614,7 +4633,7 @@ OPCODE(0x013C) src = 1 << (src & 7); FETCH_BYTE(res); flag_NotZ = res & src; -RET(8) +RET(10) } // BTST @@ -4663,7 +4682,12 @@ OPCODE(0x0140) flag_NotZ = res & src; res ^= src; DREGu32((Opcode >> 0) & 7) = res; +#ifdef USE_CYCLONE_TIMING + if (src >> 16) ctx->io_cycle_counter -= 2; +RET(6) +#else RET(8) +#endif } // BCHG @@ -4846,7 +4870,12 @@ OPCODE(0x0180) flag_NotZ = res & src; res &= ~src; DREGu32((Opcode >> 0) & 7) = res; +#ifdef USE_CYCLONE_TIMING + if (src >> 16) ctx->io_cycle_counter -= 2; +RET(8) +#else RET(10) +#endif } // BCLR @@ -5029,7 +5058,12 @@ OPCODE(0x01C0) flag_NotZ = res & src; res |= src; DREGu32((Opcode >> 0) & 7) = res; +#ifdef USE_CYCLONE_TIMING + if (src >> 16) ctx->io_cycle_counter -= 2; +RET(6) +#else RET(8) +#endif } // BSET @@ -5484,8 +5518,8 @@ OPCODE(0x1008) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5504,8 +5538,8 @@ OPCODE(0x1088) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5527,8 +5561,8 @@ OPCODE(0x10C8) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5551,8 +5585,8 @@ OPCODE(0x1108) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5575,8 +5609,8 @@ OPCODE(0x1148) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5599,8 +5633,8 @@ OPCODE(0x1188) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5623,8 +5657,8 @@ OPCODE(0x11C8) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5646,8 +5680,8 @@ OPCODE(0x13C8) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5669,8 +5703,8 @@ OPCODE(0x1EC8) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -5693,8 +5727,8 @@ OPCODE(0x1F08) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; flag_C = 0; @@ -16570,8 +16604,12 @@ OPCODE(0x46C0) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(12) } @@ -16599,8 +16637,12 @@ OPCODE(0x46D0) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(16) } @@ -16629,8 +16671,12 @@ OPCODE(0x46D8) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(16) } @@ -16659,8 +16705,12 @@ OPCODE(0x46E0) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(18) } @@ -16689,8 +16739,12 @@ OPCODE(0x46E8) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(20) } @@ -16719,8 +16773,12 @@ OPCODE(0x46F0) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(22) } @@ -16749,8 +16807,12 @@ OPCODE(0x46F8) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(20) } @@ -16778,8 +16840,12 @@ OPCODE(0x46F9) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(24) } @@ -16808,8 +16874,12 @@ OPCODE(0x46FA) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(20) } @@ -16838,8 +16908,12 @@ OPCODE(0x46FB) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(22) } @@ -16864,8 +16938,12 @@ OPCODE(0x46FC) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(16) } @@ -16894,8 +16972,12 @@ OPCODE(0x46DF) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(16) } @@ -16924,8 +17006,12 @@ OPCODE(0x46E7) } else { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING + RET(0) +#else RET(4) +#endif } RET(18) } @@ -16936,18 +17022,20 @@ OPCODE(0x4800) u32 adr, res; u32 src, dst; - res = DREGu8((Opcode >> 0) & 7); - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + dst = DREGu8((Opcode >> 0) & 7); + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - DREGu8((Opcode >> 0) & 7) = res; + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + DREGu8((Opcode >> 0) & 7) = res; + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; RET(6) } @@ -16960,18 +17048,20 @@ OPCODE(0x4810) adr = AREG((Opcode >> 0) & 7); PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(12) @@ -16986,18 +17076,20 @@ OPCODE(0x4818) adr = AREG((Opcode >> 0) & 7); AREG((Opcode >> 0) & 7) += 1; PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(12) @@ -17012,18 +17104,20 @@ OPCODE(0x4820) adr = AREG((Opcode >> 0) & 7) - 1; AREG((Opcode >> 0) & 7) = adr; PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(14) @@ -17038,18 +17132,20 @@ OPCODE(0x4828) FETCH_SWORD(adr); adr += AREG((Opcode >> 0) & 7); PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(16) @@ -17064,18 +17160,20 @@ OPCODE(0x4830) adr = AREG((Opcode >> 0) & 7); DECODE_EXT_WORD PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(18) @@ -17089,18 +17187,20 @@ OPCODE(0x4838) FETCH_SWORD(adr); PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(16) @@ -17114,18 +17214,20 @@ OPCODE(0x4839) FETCH_LONG(adr); PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(20) @@ -17140,18 +17242,20 @@ OPCODE(0x481F) adr = AREG(7); AREG(7) += 2; PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(12) @@ -17166,18 +17270,20 @@ OPCODE(0x4827) adr = AREG(7) - 2; AREG(7) = adr; PRE_IO - READ_BYTE_F(adr, res) - res = 0x9a - res - ((flag_X >> M68K_SR_X_SFT) & 1); + READ_BYTE_F(adr, dst) + res = -dst - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res != 0x9a) + if (res != 0) { - if ((res & 0x0f) == 0xa) res = (res & 0xf0) + 0x10; - res &= 0xFF; - WRITE_BYTE_F(adr, res) + flag_V = res; + if (((res|dst) & 0x0f) == 0) res = (res & 0xf0) + 6; + res = (res + 0x9a) & 0xFF; + WRITE_BYTE_F(adr, res) + flag_V &= ~res; flag_NotZ |= res; flag_X = flag_C = M68K_SR_C; } - else flag_X = flag_C = 0; + else flag_V = flag_X = flag_C = 0; flag_N = res; POST_IO RET(14) @@ -17317,7 +17423,7 @@ OPCODE(0x4890) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(8) #else @@ -17349,7 +17455,7 @@ OPCODE(0x48A0) } while (res >>= 1); AREG((Opcode >> 0) & 7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (dst - adr) * 2; + ctx->io_cycle_counter -= (dst - adr) * 2; RET(8) } @@ -17377,7 +17483,7 @@ OPCODE(0x48A8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(12) #else @@ -17409,7 +17515,7 @@ OPCODE(0x48B0) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(14) #else @@ -17440,7 +17546,7 @@ OPCODE(0x48B8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(12) #else @@ -17471,7 +17577,7 @@ OPCODE(0x48B9) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -17503,7 +17609,7 @@ OPCODE(0x48A7) } while (res >>= 1); AREG(7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (dst - adr) * 2; + ctx->io_cycle_counter -= (dst - adr) * 2; RET(8) } @@ -17530,7 +17636,7 @@ OPCODE(0x48D0) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(8) #else @@ -17562,7 +17668,7 @@ OPCODE(0x48E0) } while (res >>= 1); AREG((Opcode >> 0) & 7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (dst - adr) * 2; + ctx->io_cycle_counter -= (dst - adr) * 2; RET(8) } @@ -17590,7 +17696,7 @@ OPCODE(0x48E8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(12) #else @@ -17622,7 +17728,7 @@ OPCODE(0x48F0) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(14) #else @@ -17653,7 +17759,7 @@ OPCODE(0x48F8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(12) #else @@ -17684,7 +17790,7 @@ OPCODE(0x48F9) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -17716,7 +17822,7 @@ OPCODE(0x48E7) } while (res >>= 1); AREG(7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (dst - adr) * 2; + ctx->io_cycle_counter -= (dst - adr) * 2; RET(8) } @@ -18299,7 +18405,7 @@ OPCODE(0x4AD0) flag_NotZ = res; flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18307,7 +18413,7 @@ OPCODE(0x4AD0) POST_IO #ifdef USE_CYCLONE_TIMING -RET(18) +RET(14) #else RET(8) #endif @@ -18329,7 +18435,7 @@ OPCODE(0x4AD8) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18337,7 +18443,7 @@ OPCODE(0x4AD8) POST_IO #ifdef USE_CYCLONE_TIMING -RET(18) +RET(14) #else RET(8) #endif @@ -18359,7 +18465,7 @@ OPCODE(0x4AE0) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18367,7 +18473,7 @@ OPCODE(0x4AE0) POST_IO #ifdef USE_CYCLONE_TIMING -RET(20) +RET(16) #else RET(10) #endif @@ -18389,7 +18495,7 @@ OPCODE(0x4AE8) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18397,7 +18503,7 @@ OPCODE(0x4AE8) POST_IO #ifdef USE_CYCLONE_TIMING -RET(22) +RET(18) #else RET(12) #endif @@ -18419,7 +18525,7 @@ OPCODE(0x4AF0) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18427,7 +18533,7 @@ OPCODE(0x4AF0) POST_IO #ifdef USE_CYCLONE_TIMING -RET(24) +RET(20) #else RET(14) #endif @@ -18448,7 +18554,7 @@ OPCODE(0x4AF8) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18456,7 +18562,7 @@ OPCODE(0x4AF8) POST_IO #ifdef USE_CYCLONE_TIMING -RET(22) +RET(18) #else RET(12) #endif @@ -18477,7 +18583,7 @@ OPCODE(0x4AF9) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18485,7 +18591,7 @@ OPCODE(0x4AF9) POST_IO #ifdef USE_CYCLONE_TIMING -RET(26) +RET(22) #else RET(16) #endif @@ -18507,7 +18613,7 @@ OPCODE(0x4ADF) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18515,7 +18621,7 @@ OPCODE(0x4ADF) POST_IO #ifdef USE_CYCLONE_TIMING -RET(18) +RET(14) #else RET(8) #endif @@ -18537,7 +18643,7 @@ OPCODE(0x4AE7) flag_N = res; #ifdef PICODRIVE_HACK - if (g_m68kcontext == &PicoCpuFS68k) { + if (ctx == &PicoCpuFS68k) { res |= 0x80; WRITE_BYTE_F(adr, res); } @@ -18554,22 +18660,26 @@ RET(8) // ILLEGAL OPCODE(0x4AFC) { - SET_PC(execute_exception(M68K_ILLEGAL_INSTRUCTION_EX, GET_PC-2, GET_SR)); +#ifdef PICODRIVE_HACK + extern void SekFinishIdleDet(void); + SekFinishIdleDet(); +#endif + SET_PC(execute_exception(ctx, M68K_ILLEGAL_INSTRUCTION_EX, GET_PC-2, GET_SR)); RET(0) } // ILLEGAL A000-AFFF OPCODE(0xA000) { - SET_PC(execute_exception(M68K_1010_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_1010_EX, GET_PC-2, GET_SR)); RET(0) } // ILLEGAL F000-FFFF OPCODE(0xF000) { - SET_PC(execute_exception(M68K_1111_EX, GET_PC-2, GET_SR)); -RET(0) // 4 already taken by exc. handler + SET_PC(execute_exception(ctx, M68K_1111_EX, GET_PC-2, GET_SR)); +RET(0) } // MOVEMaR @@ -18595,7 +18705,7 @@ OPCODE(0x4C90) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(12) #else @@ -18627,7 +18737,7 @@ OPCODE(0x4C98) } while (res >>= 1); AREG((Opcode >> 0) & 7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; RET(12) } @@ -18655,7 +18765,7 @@ OPCODE(0x4CA8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -18687,7 +18797,7 @@ OPCODE(0x4CB0) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(18) #else @@ -18718,7 +18828,7 @@ OPCODE(0x4CB8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -18749,7 +18859,7 @@ OPCODE(0x4CB9) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(20) #else @@ -18781,7 +18891,7 @@ OPCODE(0x4CBA) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -18813,7 +18923,7 @@ OPCODE(0x4CBB) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(18) #else @@ -18845,7 +18955,7 @@ OPCODE(0x4C9F) } while (res >>= 1); AREG(7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; RET(12) } @@ -18872,7 +18982,7 @@ OPCODE(0x4CD0) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(12) #else @@ -18904,7 +19014,7 @@ OPCODE(0x4CD8) } while (res >>= 1); AREG((Opcode >> 0) & 7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; RET(12) } @@ -18932,7 +19042,7 @@ OPCODE(0x4CE8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -18964,7 +19074,7 @@ OPCODE(0x4CF0) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(18) #else @@ -18995,7 +19105,7 @@ OPCODE(0x4CF8) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -19026,7 +19136,7 @@ OPCODE(0x4CF9) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(20) #else @@ -19058,7 +19168,7 @@ OPCODE(0x4CFA) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(16) #else @@ -19090,7 +19200,7 @@ OPCODE(0x4CFB) psrc++; } while (res >>= 1); POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; #ifdef USE_CYCLONE_TIMING RET(18) #else @@ -19122,15 +19232,19 @@ OPCODE(0x4CDF) } while (res >>= 1); AREG(7) = adr; POST_IO - m68kcontext.io_cycle_counter -= (adr - dst) * 2; + ctx->io_cycle_counter -= (adr - dst) * 2; RET(12) } // TRAP OPCODE(0x4E40) { - SET_PC(execute_exception(M68K_TRAP_BASE_EX + (Opcode & 0xF), GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_TRAP_BASE_EX + (Opcode & 0xF), GET_PC, GET_SR)); +#ifdef USE_CYCLONE_TIMING +RET(0) +#else RET(4) +#endif } // LINK @@ -19200,8 +19314,12 @@ OPCODE(0x4E60) if (!flag_S) { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING +RET(0) +#else RET(4) +#endif } res = AREGu32((Opcode >> 0) & 7); ASP = res; @@ -19216,8 +19334,12 @@ OPCODE(0x4E68) if (!flag_S) { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING +RET(0) +#else RET(4) +#endif } res = ASP; AREG((Opcode >> 0) & 7) = res; @@ -19232,11 +19354,15 @@ OPCODE(0x4E70) if (!flag_S) { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING +RET(0) +#else RET(4) +#endif } PRE_IO - if (m68kcontext.reset_handler) m68kcontext.reset_handler(); + if (ctx->reset_handler) ctx->reset_handler(); // CPU->Reset_CallBack(); POST_IO RET(132) @@ -19256,8 +19382,12 @@ OPCODE(0x4E72) if (!flag_S) { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING +RET(0) +#else RET(4) +#endif } FETCH_WORD(res); res &= M68K_SR_MASK; @@ -19268,7 +19398,7 @@ OPCODE(0x4E72) AREG(7) = ASP; ASP = res; } - m68kcontext.execinfo |= FM68K_HALTED; + ctx->execinfo |= FM68K_HALTED; RET0() } @@ -19280,8 +19410,12 @@ OPCODE(0x4E73) if (!flag_S) { - SET_PC(execute_exception(M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); + SET_PC(execute_exception(ctx, M68K_PRIVILEGE_VIOLATION_EX, GET_PC-2, GET_SR)); +#ifdef USE_CYCLONE_TIMING +RET(0) +#else RET(4) +#endif } PRE_IO POP_16_F(res) @@ -19294,8 +19428,9 @@ OPCODE(0x4E73) AREG(7) = ASP; ASP = res; } + CHECK_BRANCH_EXCEPTION(res) POST_IO - m68kcontext.execinfo &= ~(FM68K_EMULATE_GROUP_0|FM68K_EMULATE_TRACE|FM68K_DO_TRACE); + ctx->execinfo &= ~(FM68K_EMULATE_GROUP_0|FM68K_EMULATE_TRACE|FM68K_DO_TRACE); CHECK_INT_TO_JUMP(20) RET(20) } @@ -19317,8 +19452,12 @@ RET(16) // TRAPV OPCODE(0x4E76) { - if (flag_V & 0x80) - SET_PC(execute_exception(M68K_TRAPV_EX, GET_PC, GET_SR)); + if (flag_V & 0x80) { + SET_PC(execute_exception(ctx, M68K_TRAPV_EX, GET_PC, GET_SR)); +#ifdef USE_CYCLONE_TIMING +RET(0) +#endif + } RET(4) } @@ -19573,15 +19712,15 @@ RET(14) // CHK OPCODE(0x4180) { - u32 adr, res; - u32 src, dst; + s32 src, res; - src = DREGu16((Opcode >> 0) & 7); - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + src = DREGs16((Opcode >> 0) & 7); + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); +RET(4) } RET(10) } @@ -19589,17 +19728,18 @@ RET(10) // CHK OPCODE(0x4190) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = AREG((Opcode >> 0) & 7); PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); +RET(8) } POST_IO RET(14) @@ -19608,18 +19748,19 @@ RET(14) // CHK OPCODE(0x4198) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = AREG((Opcode >> 0) & 7); AREG((Opcode >> 0) & 7) += 2; PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); +RET(8) } POST_IO RET(14) @@ -19628,18 +19769,19 @@ RET(14) // CHK OPCODE(0x41A0) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = AREG((Opcode >> 0) & 7) - 2; AREG((Opcode >> 0) & 7) = adr; PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); +RET(10) } POST_IO RET(16) @@ -19648,18 +19790,19 @@ RET(16) // CHK OPCODE(0x41A8) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; FETCH_SWORD(adr); adr += AREG((Opcode >> 0) & 7); PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); +RET(12) } POST_IO RET(18) @@ -19668,18 +19811,19 @@ RET(18) // CHK OPCODE(0x41B0) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = AREG((Opcode >> 0) & 7); DECODE_EXT_WORD PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); +RET(14) } POST_IO RET(20) @@ -19688,17 +19832,18 @@ RET(20) // CHK OPCODE(0x41B8) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; FETCH_SWORD(adr); PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); +RET(12) } POST_IO RET(18) @@ -19707,17 +19852,18 @@ RET(18) // CHK OPCODE(0x41B9) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; FETCH_LONG(adr); PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); +RET(16) } POST_IO RET(22) @@ -19726,18 +19872,19 @@ RET(22) // CHK OPCODE(0x41BA) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = GET_SWORD + GET_PC; PC++; PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); +RET(12) } POST_IO RET(18) @@ -19746,18 +19893,19 @@ RET(18) // CHK OPCODE(0x41BB) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = GET_PC; DECODE_EXT_WORD PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); +RET(14) } POST_IO RET(20) @@ -19766,15 +19914,16 @@ RET(20) // CHK OPCODE(0x41BC) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; - FETCH_WORD(src); - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + FETCH_SWORD(src); + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); +RET(8) } POST_IO RET(14) @@ -19783,18 +19932,19 @@ RET(14) // CHK OPCODE(0x419F) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = AREG(7); AREG(7) += 2; PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); +RET(8) } POST_IO RET(14) @@ -19803,18 +19953,19 @@ RET(14) // CHK OPCODE(0x41A7) { - u32 adr, res; - u32 src, dst; + s32 src, res; + u32 adr; adr = AREG(7) - 2; AREG(7) = adr; PRE_IO - READ_WORD_F(adr, src) - res = DREGu16((Opcode >> 9) & 7); - if (((s32)res < 0) || (res > src)) + READSX_WORD_F(adr, src) + res = DREGs16((Opcode >> 9) & 7); + if ((res < 0) || (res > src)) { flag_N = res >> 8; - SET_PC(execute_exception(M68K_CHK_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_CHK_EX, GET_PC, GET_SR)); +RET(10) } POST_IO RET(16) @@ -24004,11 +24155,7 @@ OPCODE(0x5048) dst = AREGu32((Opcode >> 0) & 7); res = dst + src; AREG((Opcode >> 0) & 7) = res; -#ifdef USE_CYCLONE_TIMING -RET(4) -#else RET(8) -#endif } // ADDQ @@ -25028,7 +25175,7 @@ OPCODE(0x6201) if (flag_NotZ && (!(flag_C & 0x100))) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25042,7 +25189,7 @@ OPCODE(0x6301) if ((!flag_NotZ) || (flag_C & 0x100)) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25056,7 +25203,7 @@ OPCODE(0x6401) if (!(flag_C & 0x100)) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25070,7 +25217,7 @@ OPCODE(0x6501) if (flag_C & 0x100) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25084,7 +25231,7 @@ OPCODE(0x6601) if (flag_NotZ) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25098,7 +25245,7 @@ OPCODE(0x6701) if (!flag_NotZ) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25112,7 +25259,7 @@ OPCODE(0x6801) if (!(flag_V & 0x80)) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25126,7 +25273,7 @@ OPCODE(0x6901) if (flag_V & 0x80) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25140,7 +25287,7 @@ OPCODE(0x6A01) if (!(flag_N & 0x80)) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25154,7 +25301,7 @@ OPCODE(0x6B01) if (flag_N & 0x80) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25168,7 +25315,7 @@ OPCODE(0x6C01) if (!((flag_N ^ flag_V) & 0x80)) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25182,7 +25329,7 @@ OPCODE(0x6D01) if ((flag_N ^ flag_V) & 0x80) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25196,7 +25343,7 @@ OPCODE(0x6E01) if (flag_NotZ && (!((flag_N ^ flag_V) & 0x80))) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -25210,7 +25357,7 @@ OPCODE(0x6F01) if ((!flag_NotZ) || ((flag_N ^ flag_V) & 0x80)) { PC += ((s8)(Opcode & 0xFE)) >> 1; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } @@ -26934,19 +27081,19 @@ OPCODE(0x8100) { u32 adr, res; u32 src, dst; + u32 corf = 0; src = DREGu8((Opcode >> 0) & 7); dst = DREGu8((Opcode >> 9) & 7); res = (dst & 0xF) - (src & 0xF) - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res -= 6; + if (res > 0xF) corf = 6; res += (dst & 0xF0) - (src & 0xF0); - if (res > 0x99) - { - res += 0xA0; - flag_X = flag_C = M68K_SR_C; - } - else flag_X = flag_C = 0; - flag_NotZ |= res & 0xFF; + flag_V = res; + flag_X = flag_C = (s32)res < (s32)corf ? M68K_SR_C : 0; + if (res > 0xff) res += 0xA0; + res = (res - corf) & 0xFF; + flag_V &= ~res; + flag_NotZ |= res; flag_N = res; DREGu8((Opcode >> 9) & 7) = res; RET(6) @@ -26957,6 +27104,7 @@ OPCODE(0x8108) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG((Opcode >> 0) & 7) - 1; AREG((Opcode >> 0) & 7) = adr; @@ -26966,15 +27114,14 @@ OPCODE(0x8108) AREG((Opcode >> 9) & 7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) - (src & 0xF) - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res -= 6; + if (res > 0xF) corf = 6; res += (dst & 0xF0) - (src & 0xF0); - if (res > 0x99) - { - res += 0xA0; - flag_X = flag_C = M68K_SR_C; - } - else flag_X = flag_C = 0; - flag_NotZ |= res & 0xFF; + flag_V = res; + flag_X = flag_C = (s32)res < (s32)corf ? M68K_SR_C : 0; + if (res > 0xff) res += 0xA0; + res = (res - corf) & 0xFF; + flag_V &= ~res; + flag_NotZ |= res; flag_N = res; WRITE_BYTE_F(adr, res) POST_IO @@ -26986,6 +27133,7 @@ OPCODE(0x810F) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG(7) - 2; AREG(7) = adr; @@ -26995,15 +27143,14 @@ OPCODE(0x810F) AREG((Opcode >> 9) & 7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) - (src & 0xF) - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res -= 6; + if (res > 0xF) corf = 6; res += (dst & 0xF0) - (src & 0xF0); - if (res > 0x99) - { - res += 0xA0; - flag_X = flag_C = M68K_SR_C; - } - else flag_X = flag_C = 0; - flag_NotZ |= res & 0xFF; + flag_V = res; + flag_X = flag_C = (s32)res < (s32)corf ? M68K_SR_C : 0; + if (res > 0xff) res += 0xA0; + res = (res - corf) & 0xFF; + flag_V &= ~res; + flag_NotZ |= res; flag_N = res; WRITE_BYTE_F(adr, res) POST_IO @@ -27015,6 +27162,7 @@ OPCODE(0x8F08) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG((Opcode >> 0) & 7) - 1; AREG((Opcode >> 0) & 7) = adr; @@ -27024,15 +27172,14 @@ OPCODE(0x8F08) AREG(7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) - (src & 0xF) - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res -= 6; + if (res > 0xF) corf = 6; res += (dst & 0xF0) - (src & 0xF0); - if (res > 0x99) - { - res += 0xA0; - flag_X = flag_C = M68K_SR_C; - } - else flag_X = flag_C = 0; - flag_NotZ |= res & 0xFF; + flag_V = res; + flag_X = flag_C = (s32)res < (s32)corf ? M68K_SR_C : 0; + if (res > 0xff) res += 0xA0; + res = (res - corf) & 0xFF; + flag_V &= ~res; + flag_NotZ |= res; flag_N = res; WRITE_BYTE_F(adr, res) POST_IO @@ -27044,6 +27191,7 @@ OPCODE(0x8F0F) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG(7) - 2; AREG(7) = adr; @@ -27053,15 +27201,14 @@ OPCODE(0x8F0F) AREG(7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) - (src & 0xF) - ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res -= 6; + if (res > 0xF) corf = 6; res += (dst & 0xF0) - (src & 0xF0); - if (res > 0x99) - { - res += 0xA0; - flag_X = flag_C = M68K_SR_C; - } - else flag_X = flag_C = 0; - flag_NotZ |= res & 0xFF; + flag_V = res; + flag_X = flag_C = (s32)res < (s32)corf ? M68K_SR_C : 0; + if (res > 0xff) res += 0xA0; + res = (res - corf) & 0xFF; + flag_V &= ~res; + flag_NotZ |= res; flag_N = res; WRITE_BYTE_F(adr, res) POST_IO @@ -27077,11 +27224,11 @@ OPCODE(0x80C0) src = DREGu16((Opcode >> 0) & 7); if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -RET(140) +RET(4) #else -RET(10) +RET(14) #endif } dst = DREGu32((Opcode >> 9) & 7); @@ -27095,7 +27242,7 @@ RET(10) { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -RET(140) +RET(10) #else RET(70) #endif @@ -27106,12 +27253,12 @@ RET(70) flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -RET(140) +RET(138-BITCOUNT(res,q)*2) #else RET(90) #endif + } } // DIVU @@ -27125,156 +27272,9 @@ OPCODE(0x80D0) READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -RET(144) -#else -RET(14) -#endif - } - dst = DREGu32((Opcode >> 9) & 7); - { - u32 q, r; - - q = dst / src; - r = dst % src; - - if (q & 0xFFFF0000) - { - flag_V = M68K_SR_V; -#ifdef USE_CYCLONE_TIMING_DIV -RET(144) -#else - RET(74) -#endif - } - q &= 0x0000FFFF; - flag_NotZ = q; - flag_N = q >> 8; - flag_V = flag_C = 0; - res = q | (r << 16); - DREGu32((Opcode >> 9) & 7) = res; - } -#ifdef USE_CYCLONE_TIMING_DIV -RET(144) -#else -RET(94) -#endif -} - -// DIVU -OPCODE(0x80D8) -{ - u32 adr, res; - u32 src, dst; - - adr = AREG((Opcode >> 0) & 7); - AREG((Opcode >> 0) & 7) += 2; - PRE_IO - READ_WORD_F(adr, src) - if (src == 0) - { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); -#ifdef USE_CYCLONE_TIMING_DIV -RET(144) -#else -RET(14) -#endif - } - dst = DREGu32((Opcode >> 9) & 7); - { - u32 q, r; - - q = dst / src; - r = dst % src; - - if (q & 0xFFFF0000) - { - flag_V = M68K_SR_V; -#ifdef USE_CYCLONE_TIMING_DIV -RET(144) -#else - RET(74) -#endif - } - q &= 0x0000FFFF; - flag_NotZ = q; - flag_N = q >> 8; - flag_V = flag_C = 0; - res = q | (r << 16); - DREGu32((Opcode >> 9) & 7) = res; - } -#ifdef USE_CYCLONE_TIMING_DIV -RET(144) -#else -RET(94) -#endif -} - -// DIVU -OPCODE(0x80E0) -{ - u32 adr, res; - u32 src, dst; - - adr = AREG((Opcode >> 0) & 7) - 2; - AREG((Opcode >> 0) & 7) = adr; - PRE_IO - READ_WORD_F(adr, src) - if (src == 0) - { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); -#ifdef USE_CYCLONE_TIMING_DIV -RET(146) -#else -RET(16) -#endif - } - dst = DREGu32((Opcode >> 9) & 7); - { - u32 q, r; - - q = dst / src; - r = dst % src; - - if (q & 0xFFFF0000) - { - flag_V = M68K_SR_V; -#ifdef USE_CYCLONE_TIMING_DIV -RET(146) -#else - RET(76) -#endif - } - q &= 0x0000FFFF; - flag_NotZ = q; - flag_N = q >> 8; - flag_V = flag_C = 0; - res = q | (r << 16); - DREGu32((Opcode >> 9) & 7) = res; - } -#ifdef USE_CYCLONE_TIMING_DIV -RET(146) -#else -RET(96) -#endif -} - -// DIVU -OPCODE(0x80E8) -{ - u32 adr, res; - u32 src, dst; - - FETCH_SWORD(adr); - adr += AREG((Opcode >> 0) & 7); - PRE_IO - READ_WORD_F(adr, src) - if (src == 0) - { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); -#ifdef USE_CYCLONE_TIMING_DIV -RET(148) +RET(8) #else RET(18) #endif @@ -27290,9 +27290,9 @@ RET(18) { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -RET(148) +RET(14) #else - RET(78) + RET(74) #endif } q &= 0x0000FFFF; @@ -27301,29 +27301,78 @@ RET(148) flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -RET(148) +RET(142-BITCOUNT(res,q)*2) #else -RET(98) +RET(94) #endif + } } // DIVU -OPCODE(0x80F0) +OPCODE(0x80D8) { u32 adr, res; u32 src, dst; adr = AREG((Opcode >> 0) & 7); - DECODE_EXT_WORD + AREG((Opcode >> 0) & 7) += 2; PRE_IO READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -RET(150) +RET(8) +#else +RET(18) +#endif + } + dst = DREGu32((Opcode >> 9) & 7); + { + u32 q, r; + + q = dst / src; + r = dst % src; + + if (q & 0xFFFF0000) + { + flag_V = M68K_SR_V; +#ifdef USE_CYCLONE_TIMING_DIV +RET(14) +#else + RET(74) +#endif + } + q &= 0x0000FFFF; + flag_NotZ = q; + flag_N = q >> 8; + flag_V = flag_C = 0; + res = q | (r << 16); + DREGu32((Opcode >> 9) & 7) = res; +#ifdef USE_CYCLONE_TIMING_DIV +RET(142-BITCOUNT(res,q)*2) +#else +RET(94) +#endif + } +} + +// DIVU +OPCODE(0x80E0) +{ + u32 adr, res; + u32 src, dst; + + adr = AREG((Opcode >> 0) & 7) - 2; + AREG((Opcode >> 0) & 7) = adr; + PRE_IO + READ_WORD_F(adr, src) + if (src == 0) + { + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); +#ifdef USE_CYCLONE_TIMING_DIV +RET(10) #else RET(20) #endif @@ -27339,9 +27388,9 @@ RET(20) { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -RET(150) +RET(16) #else - RET(80) + RET(76) #endif } q &= 0x0000FFFF; @@ -27350,76 +27399,29 @@ RET(150) flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -RET(150) +RET(144-BITCOUNT(res,q)*2) #else -RET(100) +RET(96) #endif + } } // DIVU -OPCODE(0x80F8) +OPCODE(0x80E8) { u32 adr, res; u32 src, dst; FETCH_SWORD(adr); + adr += AREG((Opcode >> 0) & 7); PRE_IO READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -RET(148) -#else -RET(18) -#endif - } - dst = DREGu32((Opcode >> 9) & 7); - { - u32 q, r; - - q = dst / src; - r = dst % src; - - if (q & 0xFFFF0000) - { - flag_V = M68K_SR_V; -#ifdef USE_CYCLONE_TIMING_DIV -RET(148) -#else - RET(78) -#endif - } - q &= 0x0000FFFF; - flag_NotZ = q; - flag_N = q >> 8; - flag_V = flag_C = 0; - res = q | (r << 16); - DREGu32((Opcode >> 9) & 7) = res; - } -#ifdef USE_CYCLONE_TIMING_DIV -RET(148) -#else -RET(98) -#endif -} - -// DIVU -OPCODE(0x80F9) -{ - u32 adr, res; - u32 src, dst; - - FETCH_LONG(adr); - PRE_IO - READ_WORD_F(adr, src) - if (src == 0) - { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); -#ifdef USE_CYCLONE_TIMING_DIV -RET(162) +RET(12) #else RET(22) #endif @@ -27435,7 +27437,152 @@ RET(22) { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -RET(162) +RET(18) +#else + RET(78) +#endif + } + q &= 0x0000FFFF; + flag_NotZ = q; + flag_N = q >> 8; + flag_V = flag_C = 0; + res = q | (r << 16); + DREGu32((Opcode >> 9) & 7) = res; +#ifdef USE_CYCLONE_TIMING_DIV +RET(146-BITCOUNT(res,q)*2) +#else +RET(98) +#endif + } +} + +// DIVU +OPCODE(0x80F0) +{ + u32 adr, res; + u32 src, dst; + + adr = AREG((Opcode >> 0) & 7); + DECODE_EXT_WORD + PRE_IO + READ_WORD_F(adr, src) + if (src == 0) + { + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); +#ifdef USE_CYCLONE_TIMING_DIV +RET(14) +#else +RET(24) +#endif + } + dst = DREGu32((Opcode >> 9) & 7); + { + u32 q, r; + + q = dst / src; + r = dst % src; + + if (q & 0xFFFF0000) + { + flag_V = M68K_SR_V; +#ifdef USE_CYCLONE_TIMING_DIV +RET(20) +#else + RET(80) +#endif + } + q &= 0x0000FFFF; + flag_NotZ = q; + flag_N = q >> 8; + flag_V = flag_C = 0; + res = q | (r << 16); + DREGu32((Opcode >> 9) & 7) = res; +#ifdef USE_CYCLONE_TIMING_DIV +RET(148-BITCOUNT(res,q)*2) +#else +RET(100) +#endif + } +} + +// DIVU +OPCODE(0x80F8) +{ + u32 adr, res; + u32 src, dst; + + FETCH_SWORD(adr); + PRE_IO + READ_WORD_F(adr, src) + if (src == 0) + { + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); +#ifdef USE_CYCLONE_TIMING_DIV +RET(12) +#else +RET(22) +#endif + } + dst = DREGu32((Opcode >> 9) & 7); + { + u32 q, r; + + q = dst / src; + r = dst % src; + + if (q & 0xFFFF0000) + { + flag_V = M68K_SR_V; +#ifdef USE_CYCLONE_TIMING_DIV +RET(18) +#else + RET(78) +#endif + } + q &= 0x0000FFFF; + flag_NotZ = q; + flag_N = q >> 8; + flag_V = flag_C = 0; + res = q | (r << 16); + DREGu32((Opcode >> 9) & 7) = res; +#ifdef USE_CYCLONE_TIMING_DIV +RET(146-BITCOUNT(res,q)*2) +#else +RET(98) +#endif + } +} + +// DIVU +OPCODE(0x80F9) +{ + u32 adr, res; + u32 src, dst; + + FETCH_LONG(adr); + PRE_IO + READ_WORD_F(adr, src) + if (src == 0) + { + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); +#ifdef USE_CYCLONE_TIMING_DIV +RET(16) +#else +RET(26) +#endif + } + dst = DREGu32((Opcode >> 9) & 7); + { + u32 q, r; + + q = dst / src; + r = dst % src; + + if (q & 0xFFFF0000) + { + flag_V = M68K_SR_V; +#ifdef USE_CYCLONE_TIMING_DIV +RET(22) #else RET(82) #endif @@ -27446,12 +27593,12 @@ RET(162) flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -RET(162) +RET(150-BITCOUNT(res,q)*2) #else RET(102) #endif + } } // DIVU @@ -27466,9 +27613,104 @@ OPCODE(0x80FA) READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -RET(148) +RET(12) +#else +RET(22) +#endif + } + dst = DREGu32((Opcode >> 9) & 7); + { + u32 q, r; + + q = dst / src; + r = dst % src; + + if (q & 0xFFFF0000) + { + flag_V = M68K_SR_V; +#ifdef USE_CYCLONE_TIMING_DIV +RET(18) +#else + RET(78) +#endif + } + q &= 0x0000FFFF; + flag_NotZ = q; + flag_N = q >> 8; + flag_V = flag_C = 0; + res = q | (r << 16); + DREGu32((Opcode >> 9) & 7) = res; +#ifdef USE_CYCLONE_TIMING_DIV +RET(146-BITCOUNT(res,q)*2) +#else +RET(98) +#endif + } +} + +// DIVU +OPCODE(0x80FB) +{ + u32 adr, res; + u32 src, dst; + + adr = GET_PC; + DECODE_EXT_WORD + PRE_IO + READ_WORD_F(adr, src) + if (src == 0) + { + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); +#ifdef USE_CYCLONE_TIMING_DIV +RET(14) +#else +RET(24) +#endif + } + dst = DREGu32((Opcode >> 9) & 7); + { + u32 q, r; + + q = dst / src; + r = dst % src; + + if (q & 0xFFFF0000) + { + flag_V = M68K_SR_V; +#ifdef USE_CYCLONE_TIMING_DIV +RET(20) +#else + RET(80) +#endif + } + q &= 0x0000FFFF; + flag_NotZ = q; + flag_N = q >> 8; + flag_V = flag_C = 0; + res = q | (r << 16); + DREGu32((Opcode >> 9) & 7) = res; +#ifdef USE_CYCLONE_TIMING_DIV +RET(148-BITCOUNT(res,q)*2) +#else +RET(100) +#endif + } +} + +// DIVU +OPCODE(0x80FC) +{ + u32 adr, res; + u32 src, dst; + + FETCH_WORD(src); + if (src == 0) + { + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); +#ifdef USE_CYCLONE_TIMING_DIV +RET(8) #else RET(18) #endif @@ -27484,9 +27726,9 @@ RET(18) { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -RET(148) +RET(14) #else - RET(78) + RET(74) #endif } q &= 0x0000FFFF; @@ -27495,29 +27737,78 @@ RET(148) flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -RET(148) +RET(142-BITCOUNT(res,q)*2) #else -RET(98) +RET(94) #endif + } } // DIVU -OPCODE(0x80FB) +OPCODE(0x80DF) { u32 adr, res; u32 src, dst; - adr = GET_PC; - DECODE_EXT_WORD + adr = AREG(7); + AREG(7) += 2; PRE_IO READ_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -RET(160) +RET(8) +#else +RET(18) +#endif + } + dst = DREGu32((Opcode >> 9) & 7); + { + u32 q, r; + + q = dst / src; + r = dst % src; + + if (q & 0xFFFF0000) + { + flag_V = M68K_SR_V; +#ifdef USE_CYCLONE_TIMING_DIV +RET(14) +#else + RET(74) +#endif + } + q &= 0x0000FFFF; + flag_NotZ = q; + flag_N = q >> 8; + flag_V = flag_C = 0; + res = q | (r << 16); + DREGu32((Opcode >> 9) & 7) = res; +#ifdef USE_CYCLONE_TIMING_DIV +RET(142-BITCOUNT(res,q)*2) +#else +RET(94) +#endif + } +} + +// DIVU +OPCODE(0x80E7) +{ + u32 adr, res; + u32 src, dst; + + adr = AREG(7) - 2; + AREG(7) = adr; + PRE_IO + READ_WORD_F(adr, src) + if (src == 0) + { + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); +#ifdef USE_CYCLONE_TIMING_DIV +RET(10) #else RET(20) #endif @@ -27533,151 +27824,7 @@ RET(20) { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -RET(160) -#else - RET(80) -#endif - } - q &= 0x0000FFFF; - flag_NotZ = q; - flag_N = q >> 8; - flag_V = flag_C = 0; - res = q | (r << 16); - DREGu32((Opcode >> 9) & 7) = res; - } -#ifdef USE_CYCLONE_TIMING_DIV -RET(160) -#else -RET(100) -#endif -} - -// DIVU -OPCODE(0x80FC) -{ - u32 adr, res; - u32 src, dst; - - FETCH_WORD(src); - if (src == 0) - { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); -#ifdef USE_CYCLONE_TIMING_DIV -RET(144) -#else -RET(14) -#endif - } - dst = DREGu32((Opcode >> 9) & 7); - { - u32 q, r; - - q = dst / src; - r = dst % src; - - if (q & 0xFFFF0000) - { - flag_V = M68K_SR_V; -#ifdef USE_CYCLONE_TIMING_DIV -RET(144) -#else - RET(74) -#endif - } - q &= 0x0000FFFF; - flag_NotZ = q; - flag_N = q >> 8; - flag_V = flag_C = 0; - res = q | (r << 16); - DREGu32((Opcode >> 9) & 7) = res; - } -#ifdef USE_CYCLONE_TIMING_DIV -RET(144) -#else -RET(94) -#endif -} - -// DIVU -OPCODE(0x80DF) -{ - u32 adr, res; - u32 src, dst; - - adr = AREG(7); - AREG(7) += 2; - PRE_IO - READ_WORD_F(adr, src) - if (src == 0) - { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); -#ifdef USE_CYCLONE_TIMING_DIV -RET(144) -#else -RET(14) -#endif - } - dst = DREGu32((Opcode >> 9) & 7); - { - u32 q, r; - - q = dst / src; - r = dst % src; - - if (q & 0xFFFF0000) - { - flag_V = M68K_SR_V; -#ifdef USE_CYCLONE_TIMING_DIV -RET(144) -#else - RET(74) -#endif - } - q &= 0x0000FFFF; - flag_NotZ = q; - flag_N = q >> 8; - flag_V = flag_C = 0; - res = q | (r << 16); - DREGu32((Opcode >> 9) & 7) = res; - } -#ifdef USE_CYCLONE_TIMING_DIV -RET(144) -#else -RET(94) -#endif -} - -// DIVU -OPCODE(0x80E7) -{ - u32 adr, res; - u32 src, dst; - - adr = AREG(7) - 2; - AREG(7) = adr; - PRE_IO - READ_WORD_F(adr, src) - if (src == 0) - { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); -#ifdef USE_CYCLONE_TIMING_DIV -RET(146) -#else RET(16) -#endif - } - dst = DREGu32((Opcode >> 9) & 7); - { - u32 q, r; - - q = dst / src; - r = dst % src; - - if (q & 0xFFFF0000) - { - flag_V = M68K_SR_V; -#ifdef USE_CYCLONE_TIMING_DIV -RET(146) #else RET(76) #endif @@ -27688,12 +27835,12 @@ RET(146) flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -RET(146) +RET(144-BITCOUNT(res,q)*2) #else RET(96) #endif + } } // DIVS @@ -27705,11 +27852,12 @@ OPCODE(0x81C0) src = (s32)DREGs16((Opcode >> 0) & 7); if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -goto end81C0; +RET(4) +#else + RET(14) #endif - RET(10) } dst = DREGu32((Opcode >> 9) & 7); if ((dst == 0x80000000) && (src == (u32)-1)) @@ -27719,9 +27867,10 @@ goto end81C0; res = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING_DIV -goto end81C0; -#endif +RET(16+(dst>>31)*2) +#else RET(50) +#endif } { s32 q, r; @@ -27733,9 +27882,12 @@ goto end81C0; { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -goto end81C0; -#endif +if (q > 0xFFFF || q < -0x10000) { + RET(16+(dst>>31)*2) +} else RET(152+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(80) +#endif } q &= 0x0000FFFF; flag_NotZ = q; @@ -27743,11 +27895,12 @@ goto end81C0; flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -end81C0: m68kcontext.io_cycle_counter -= 50; -#endif +RET(152+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(108) +#endif + } } // DIVS @@ -27761,11 +27914,12 @@ OPCODE(0x81D0) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -goto end81D0; +RET(8) +#else + RET(18) #endif - RET(14) } dst = DREGu32((Opcode >> 9) & 7); if ((dst == 0x80000000) && (src == (u32)-1)) @@ -27775,9 +27929,10 @@ goto end81D0; res = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING_DIV -goto end81D0; -#endif +RET(20+(dst>>31)*2) +#else RET(54) +#endif } { s32 q, r; @@ -27789,9 +27944,12 @@ goto end81D0; { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -goto end81D0; -#endif +if (q > 0xFFFF || q < -0x10000) { + RET(20+(dst>>31)*2) +} else RET(156+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(84) +#endif } q &= 0x0000FFFF; flag_NotZ = q; @@ -27799,11 +27957,12 @@ goto end81D0; flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -end81D0: m68kcontext.io_cycle_counter -= 50; -#endif +RET(156+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(112) +#endif + } } // DIVS @@ -27818,11 +27977,12 @@ OPCODE(0x81D8) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -goto end81D8; +RET(8) +#else + RET(18) #endif - RET(14) } dst = DREGu32((Opcode >> 9) & 7); if ((dst == 0x80000000) && (src == (u32)-1)) @@ -27832,9 +27992,10 @@ goto end81D8; res = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING_DIV -goto end81D8; -#endif +RET(20+(dst>>31)*2) +#else RET(54) +#endif } { s32 q, r; @@ -27846,9 +28007,12 @@ goto end81D8; { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -goto end81D8; -#endif +if (q > 0xFFFF || q < -0x10000) { + RET(20+(dst>>31)*2) +} else RET(156+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(84) +#endif } q &= 0x0000FFFF; flag_NotZ = q; @@ -27856,11 +28020,12 @@ goto end81D8; flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -end81D8: m68kcontext.io_cycle_counter -= 50; -#endif +RET(156+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(112) +#endif + } } // DIVS @@ -27875,11 +28040,12 @@ OPCODE(0x81E0) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -goto end81E0; +RET(10) +#else + RET(20) #endif - RET(16) } dst = DREGu32((Opcode >> 9) & 7); if ((dst == 0x80000000) && (src == (u32)-1)) @@ -27889,9 +28055,10 @@ goto end81E0; res = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING_DIV -goto end81E0; -#endif +RET(22+(dst>>31)*2) +#else RET(56) +#endif } { s32 q, r; @@ -27903,9 +28070,12 @@ goto end81E0; { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -goto end81E0; -#endif +if (q > 0xFFFF || q < -0x10000) { + RET(22+(dst>>31)*2) +} else RET(158+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(86) +#endif } q &= 0x0000FFFF; flag_NotZ = q; @@ -27913,11 +28083,12 @@ goto end81E0; flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -end81E0: m68kcontext.io_cycle_counter -= 50; -#endif +RET(158+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(114) +#endif + } } // DIVS @@ -27932,11 +28103,12 @@ OPCODE(0x81E8) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -goto end81E8; +RET(12) +#else + RET(22) #endif - RET(18) } dst = DREGu32((Opcode >> 9) & 7); if ((dst == 0x80000000) && (src == (u32)-1)) @@ -27946,9 +28118,10 @@ goto end81E8; res = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING_DIV -goto end81E8; -#endif +RET(24+(dst>>31)*2) +#else RET(58) +#endif } { s32 q, r; @@ -27960,9 +28133,12 @@ goto end81E8; { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -goto end81E8; -#endif +if (q > 0xFFFF || q < -0x10000) { + RET(24+(dst>>31)*2) +} else RET(160+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(88) +#endif } q &= 0x0000FFFF; flag_NotZ = q; @@ -27970,11 +28146,12 @@ goto end81E8; flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -end81E8: m68kcontext.io_cycle_counter -= 50; -#endif +RET(160+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(116) +#endif + } } // DIVS @@ -27989,11 +28166,12 @@ OPCODE(0x81F0) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -goto end81F0; +RET(14) +#else + RET(24) #endif - RET(20) } dst = DREGu32((Opcode >> 9) & 7); if ((dst == 0x80000000) && (src == (u32)-1)) @@ -28003,9 +28181,10 @@ goto end81F0; res = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING_DIV -goto end81F0; -#endif +RET(26+(dst>>31)*2) +#else RET(60) +#endif } { s32 q, r; @@ -28017,9 +28196,12 @@ goto end81F0; { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -goto end81F0; -#endif +if (q > 0xFFFF || q < -0x10000) { + RET(26+(dst>>31)*2) +} else RET(162+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(90) +#endif } q &= 0x0000FFFF; flag_NotZ = q; @@ -28027,11 +28209,12 @@ goto end81F0; flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -end81F0: m68kcontext.io_cycle_counter -= 50; -#endif +RET(162+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(118) +#endif + } } // DIVS @@ -28045,11 +28228,12 @@ OPCODE(0x81F8) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -goto end81F8; +RET(12) +#else + RET(22) #endif - RET(18) } dst = DREGu32((Opcode >> 9) & 7); if ((dst == 0x80000000) && (src == (u32)-1)) @@ -28059,9 +28243,10 @@ goto end81F8; res = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING_DIV -goto end81F8; -#endif +RET(24+(dst>>31)*2) +#else RET(58) +#endif } { s32 q, r; @@ -28073,9 +28258,12 @@ goto end81F8; { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -goto end81F8; -#endif +if (q > 0xFFFF || q < -0x10000) { + RET(24+(dst>>31)*2) +} else RET(160+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(88) +#endif } q &= 0x0000FFFF; flag_NotZ = q; @@ -28083,11 +28271,12 @@ goto end81F8; flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -end81F8: m68kcontext.io_cycle_counter -= 50; -#endif +RET(160+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(116) +#endif + } } // DIVS @@ -28101,11 +28290,12 @@ OPCODE(0x81F9) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -goto end81F9; +RET(16) +#else + RET(26) #endif - RET(22) } dst = DREGu32((Opcode >> 9) & 7); if ((dst == 0x80000000) && (src == (u32)-1)) @@ -28115,9 +28305,10 @@ goto end81F9; res = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING_DIV -goto end81F9; -#endif +RET(28+(dst>>31)*2) +#else RET(62) +#endif } { s32 q, r; @@ -28129,9 +28320,12 @@ goto end81F9; { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -goto end81F9; -#endif +if (q > 0xFFFF || q < -0x10000) { + RET(28+(dst>>31)*2) +} else RET(164+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(92) +#endif } q &= 0x0000FFFF; flag_NotZ = q; @@ -28139,11 +28333,12 @@ goto end81F9; flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -end81F9: m68kcontext.io_cycle_counter -= 50; -#endif +RET(164+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(120) +#endif + } } // DIVS @@ -28158,11 +28353,12 @@ OPCODE(0x81FA) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -goto end81FA; +RET(12) +#else + RET(22) #endif - RET(18) } dst = DREGu32((Opcode >> 9) & 7); if ((dst == 0x80000000) && (src == (u32)-1)) @@ -28172,9 +28368,10 @@ goto end81FA; res = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING_DIV -goto end81FA; -#endif +RET(24+(dst>>31)*2) +#else RET(58) +#endif } { s32 q, r; @@ -28186,9 +28383,12 @@ goto end81FA; { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -goto end81FA; -#endif +if (q > 0xFFFF || q < -0x10000) { + RET(24+(dst>>31)*2) +} else RET(160+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(88) +#endif } q &= 0x0000FFFF; flag_NotZ = q; @@ -28196,11 +28396,12 @@ goto end81FA; flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -end81FA: m68kcontext.io_cycle_counter -= 50; -#endif +RET(160+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(116) +#endif + } } // DIVS @@ -28215,11 +28416,12 @@ OPCODE(0x81FB) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -goto end81FB; +RET(14) +#else + RET(24) #endif - RET(20) } dst = DREGu32((Opcode >> 9) & 7); if ((dst == 0x80000000) && (src == (u32)-1)) @@ -28229,9 +28431,10 @@ goto end81FB; res = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING_DIV -goto end81FB; -#endif +RET(26+(dst>>31)*2) +#else RET(60) +#endif } { s32 q, r; @@ -28243,9 +28446,12 @@ goto end81FB; { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -goto end81FB; -#endif +if (q > 0xFFFF || q < -0x10000) { + RET(26+(dst>>31)*2) +} else RET(162+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(90) +#endif } q &= 0x0000FFFF; flag_NotZ = q; @@ -28253,11 +28459,12 @@ goto end81FB; flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -end81FB: m68kcontext.io_cycle_counter -= 50; -#endif +RET(162+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(118) +#endif + } } // DIVS @@ -28269,11 +28476,12 @@ OPCODE(0x81FC) FETCH_SWORD(src); if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -goto end81FC; +RET(8) +#else + RET(18) #endif - RET(14) } dst = DREGu32((Opcode >> 9) & 7); if ((dst == 0x80000000) && (src == (u32)-1)) @@ -28283,9 +28491,10 @@ goto end81FC; res = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING_DIV -goto end81FC; -#endif +RET(20+(dst>>31)*2) +#else RET(54) +#endif } { s32 q, r; @@ -28297,9 +28506,12 @@ goto end81FC; { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -goto end81FC; -#endif +if (q > 0xFFFF || q < -0x10000) { + RET(20+(dst>>31)*2) +} else RET(156+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(84) +#endif } q &= 0x0000FFFF; flag_NotZ = q; @@ -28307,11 +28519,12 @@ goto end81FC; flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -end81FC: m68kcontext.io_cycle_counter -= 50; -#endif +RET(156+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(112) +#endif + } } // DIVS @@ -28326,11 +28539,12 @@ OPCODE(0x81DF) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -goto end81DF; +RET(8) +#else + RET(18) #endif - RET(14) } dst = DREGu32((Opcode >> 9) & 7); if ((dst == 0x80000000) && (src == (u32)-1)) @@ -28340,9 +28554,10 @@ goto end81DF; res = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING_DIV -goto end81DF; -#endif +RET(20+(dst>>31)*2) +#else RET(54) +#endif } { s32 q, r; @@ -28354,9 +28569,12 @@ goto end81DF; { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -goto end81DF; -#endif +if (q > 0xFFFF || q < -0x10000) { + RET(20+(dst>>31)*2) +} else RET(156+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(84) +#endif } q &= 0x0000FFFF; flag_NotZ = q; @@ -28364,11 +28582,12 @@ goto end81DF; flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -end81DF: m68kcontext.io_cycle_counter -= 50; -#endif +RET(156+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(112) +#endif + } } // DIVS @@ -28383,11 +28602,12 @@ OPCODE(0x81E7) READSX_WORD_F(adr, src) if (src == 0) { - SET_PC(execute_exception(M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); + SET_PC(execute_exception(ctx, M68K_ZERO_DIVIDE_EX, GET_PC, GET_SR)); #ifdef USE_CYCLONE_TIMING_DIV -goto end81E7; +RET(10) +#else + RET(20) #endif - RET(16) } dst = DREGu32((Opcode >> 9) & 7); if ((dst == 0x80000000) && (src == (u32)-1)) @@ -28397,9 +28617,10 @@ goto end81E7; res = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING_DIV -goto end81E7; -#endif +RET(22+(dst>>31)*2) +#else RET(56) +#endif } { s32 q, r; @@ -28411,9 +28632,12 @@ goto end81E7; { flag_V = M68K_SR_V; #ifdef USE_CYCLONE_TIMING_DIV -goto end81E7; -#endif +if (q > 0xFFFF || q < -0x10000) { + RET(22+(dst>>31)*2) +} else RET(158+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(86) +#endif } q &= 0x0000FFFF; flag_NotZ = q; @@ -28421,11 +28645,12 @@ goto end81E7; flag_V = flag_C = 0; res = q | (r << 16); DREGu32((Opcode >> 9) & 7) = res; - } #ifdef USE_CYCLONE_TIMING_DIV -end81E7: m68kcontext.io_cycle_counter -= 50; -#endif +RET(158+(dst>>31)*4-(q>>31)*2-BITCOUNT(res,abs(q))*2) +#else RET(114) +#endif + } } // SUBaD @@ -28452,8 +28677,8 @@ OPCODE(0x9008) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; dst = DREGu8((Opcode >> 9) & 7); @@ -30626,8 +30851,8 @@ OPCODE(0xB008) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; dst = DREGu8((Opcode >> 9) & 7); @@ -34121,18 +34346,22 @@ OPCODE(0xC100) { u32 adr, res; u32 src, dst; + u32 corf = 0; src = DREGu8((Opcode >> 0) & 7); dst = DREGu8((Opcode >> 9) & 7); res = (dst & 0xF) + (src & 0xF) + ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res += 6; + if (res > 9) corf = 6; res += (dst & 0xF0) + (src & 0xF0); - if (res > 0x99) + flag_V = ~res; + res += corf; + if (res > 0x9F) { res -= 0xA0; flag_X = flag_C = M68K_SR_C; } else flag_X = flag_C = 0; + flag_V &= res; flag_NotZ |= res & 0xFF; flag_N = res; DREGu8((Opcode >> 9) & 7) = res; @@ -34144,6 +34373,7 @@ OPCODE(0xC108) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG((Opcode >> 0) & 7) - 1; AREG((Opcode >> 0) & 7) = adr; @@ -34153,14 +34383,17 @@ OPCODE(0xC108) AREG((Opcode >> 9) & 7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) + (src & 0xF) + ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res += 6; + if (res > 9) corf = 6; res += (dst & 0xF0) + (src & 0xF0); - if (res > 0x99) + flag_V = ~res; + res += corf; + if (res > 0x9F) { res -= 0xA0; flag_X = flag_C = M68K_SR_C; } else flag_X = flag_C = 0; + flag_V &= res; flag_NotZ |= res & 0xFF; flag_N = res; WRITE_BYTE_F(adr, res) @@ -34173,6 +34406,7 @@ OPCODE(0xC10F) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG(7) - 2; AREG(7) = adr; @@ -34182,14 +34416,17 @@ OPCODE(0xC10F) AREG((Opcode >> 9) & 7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) + (src & 0xF) + ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res += 6; + if (res > 9) corf = 6; res += (dst & 0xF0) + (src & 0xF0); - if (res > 0x99) + flag_V = ~res; + res += corf; + if (res > 0x9F) { res -= 0xA0; flag_X = flag_C = M68K_SR_C; } else flag_X = flag_C = 0; + flag_V &= res; flag_NotZ |= res & 0xFF; flag_N = res; WRITE_BYTE_F(adr, res) @@ -34202,6 +34439,7 @@ OPCODE(0xCF08) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG((Opcode >> 0) & 7) - 1; AREG((Opcode >> 0) & 7) = adr; @@ -34211,14 +34449,17 @@ OPCODE(0xCF08) AREG(7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) + (src & 0xF) + ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res += 6; + if (res > 9) corf = 6; res += (dst & 0xF0) + (src & 0xF0); - if (res > 0x99) + flag_V = ~res; + res += corf; + if (res > 0x9F) { res -= 0xA0; flag_X = flag_C = M68K_SR_C; } else flag_X = flag_C = 0; + flag_V &= res; flag_NotZ |= res & 0xFF; flag_N = res; WRITE_BYTE_F(adr, res) @@ -34231,6 +34472,7 @@ OPCODE(0xCF0F) { u32 adr, res; u32 src, dst; + u32 corf = 0; adr = AREG(7) - 2; AREG(7) = adr; @@ -34240,14 +34482,17 @@ OPCODE(0xCF0F) AREG(7) = adr; READ_BYTE_F(adr, dst) res = (dst & 0xF) + (src & 0xF) + ((flag_X >> M68K_SR_X_SFT) & 1); - if (res > 9) res += 6; + if (res > 9) corf = 6; res += (dst & 0xF0) + (src & 0xF0); - if (res > 0x99) + flag_V = ~res; + res += corf; + if (res > 0x9F) { res -= 0xA0; flag_X = flag_C = M68K_SR_C; } else flag_X = flag_C = 0; + flag_V &= res; flag_NotZ |= res & 0xFF; flag_N = res; WRITE_BYTE_F(adr, res) @@ -34269,7 +34514,7 @@ OPCODE(0xC0C0) flag_V = flag_C = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING -RET(54) +RET(38+BITCOUNT(res,src)*2) #else RET(50) #endif @@ -34292,7 +34537,7 @@ OPCODE(0xC0D0) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(58) +RET(42+BITCOUNT(res,src)*2) #else RET(54) #endif @@ -34316,7 +34561,7 @@ OPCODE(0xC0D8) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(58) +RET(42+BITCOUNT(res,src)*2) #else RET(54) #endif @@ -34340,7 +34585,7 @@ OPCODE(0xC0E0) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(60) +RET(44+BITCOUNT(res,src)*2) #else RET(56) #endif @@ -34364,7 +34609,7 @@ OPCODE(0xC0E8) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(62) +RET(46+BITCOUNT(res,src)*2) #else RET(58) #endif @@ -34388,7 +34633,7 @@ OPCODE(0xC0F0) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(64) +RET(48+BITCOUNT(res,src)*2) #else RET(60) #endif @@ -34411,7 +34656,7 @@ OPCODE(0xC0F8) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(62) +RET(46+BITCOUNT(res,src)*2) #else RET(58) #endif @@ -34434,7 +34679,7 @@ OPCODE(0xC0F9) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(66) +RET(50+BITCOUNT(res,src)*2) #else RET(62) #endif @@ -34458,7 +34703,7 @@ OPCODE(0xC0FA) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(62) +RET(46+BITCOUNT(res,src)*2) #else RET(58) #endif @@ -34482,7 +34727,7 @@ OPCODE(0xC0FB) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(64) +RET(48+BITCOUNT(res,src)*2) #else RET(60) #endif @@ -34502,7 +34747,7 @@ OPCODE(0xC0FC) flag_V = flag_C = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING -RET(58) +RET(42+BITCOUNT(res,src)*2) #else RET(54) #endif @@ -34526,7 +34771,7 @@ OPCODE(0xC0DF) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(58) +RET(42+BITCOUNT(res,src)*2) #else RET(54) #endif @@ -34550,7 +34795,7 @@ OPCODE(0xC0E7) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(60) +RET(44+BITCOUNT(res,src)*2) #else RET(56) #endif @@ -34570,7 +34815,7 @@ OPCODE(0xC1C0) flag_V = flag_C = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING -RET(54) +RET(38+BITCOUNT(res,src^(src<<1))) #else RET(50) #endif @@ -34593,7 +34838,7 @@ OPCODE(0xC1D0) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(58) +RET(42+BITCOUNT(res,src^(src<<1))*2) #else RET(54) #endif @@ -34617,7 +34862,7 @@ OPCODE(0xC1D8) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(58) +RET(42+BITCOUNT(res,src^(src<<1))*2) #else RET(54) #endif @@ -34641,7 +34886,7 @@ OPCODE(0xC1E0) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(60) +RET(44+BITCOUNT(res,src^(src<<1))*2) #else RET(56) #endif @@ -34665,7 +34910,7 @@ OPCODE(0xC1E8) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(62) +RET(46+BITCOUNT(res,src^(src<<1))*2) #else RET(58) #endif @@ -34689,7 +34934,7 @@ OPCODE(0xC1F0) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(64) +RET(48+BITCOUNT(res,src^(src<<1))*2) #else RET(60) #endif @@ -34712,7 +34957,7 @@ OPCODE(0xC1F8) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(62) +RET(46+BITCOUNT(res,src^(src<<1))*2) #else RET(58) #endif @@ -34735,7 +34980,7 @@ OPCODE(0xC1F9) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(66) +RET(50+BITCOUNT(res,src^(src<<1))*2) #else RET(62) #endif @@ -34759,7 +35004,7 @@ OPCODE(0xC1FA) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(62) +RET(46+BITCOUNT(res,src^(src<<1))*2) #else RET(58) #endif @@ -34783,7 +35028,7 @@ OPCODE(0xC1FB) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(64) +RET(48+BITCOUNT(res,src^(src<<1))*2) #else RET(60) #endif @@ -34803,7 +35048,7 @@ OPCODE(0xC1FC) flag_V = flag_C = 0; DREGu32((Opcode >> 9) & 7) = res; #ifdef USE_CYCLONE_TIMING -RET(58) +RET(42+BITCOUNT(res,src^(src<<1))*2) #else RET(54) #endif @@ -34827,7 +35072,7 @@ OPCODE(0xC1DF) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(58) +RET(42+BITCOUNT(res,src^(src<<1))*2) #else RET(54) #endif @@ -34851,7 +35096,7 @@ OPCODE(0xC1E7) DREGu32((Opcode >> 9) & 7) = res; POST_IO #ifdef USE_CYCLONE_TIMING -RET(60) +RET(44+BITCOUNT(res,src^(src<<1))*2) #else RET(56) #endif @@ -34923,8 +35168,8 @@ OPCODE(0xD008) u32 src, dst; // can't read byte from Ax registers ! - m68kcontext.execinfo |= M68K_FAULTED; - m68kcontext.io_cycle_counter = 0; + ctx->execinfo |= M68K_FAULTED; + ctx->io_cycle_counter = 0; /* goto famec_Exec_End; dst = DREGu8((Opcode >> 9) & 7); @@ -37083,7 +37328,7 @@ OPCODE(0xE000) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = (s32)DREGs8((Opcode >> 0) & 7); flag_V = 0; flag_X = flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37103,7 +37348,7 @@ OPCODE(0xE040) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = (s32)DREGs16((Opcode >> 0) & 7); flag_V = 0; flag_X = flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37123,7 +37368,7 @@ OPCODE(0xE080) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = (s32)DREGs32((Opcode >> 0) & 7); flag_V = 0; flag_X = flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37143,7 +37388,7 @@ OPCODE(0xE008) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); flag_N = flag_V = 0; flag_X = flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37162,7 +37407,7 @@ OPCODE(0xE048) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); flag_N = flag_V = 0; flag_X = flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37181,7 +37426,7 @@ OPCODE(0xE088) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_N = flag_V = 0; flag_X = flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37200,7 +37445,7 @@ OPCODE(0xE010) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); src |= (flag_X & M68K_SR_X) << 0; res = (src >> sft) | (src << (9 - sft)); @@ -37221,7 +37466,7 @@ OPCODE(0xE050) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); src |= (flag_X & M68K_SR_X) << 8; res = (src >> sft) | (src << (17 - sft)); @@ -37242,7 +37487,7 @@ OPCODE(0xE090) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_C = src << ((M68K_SR_C_SFT + 1) - sft); if (sft == 1) res = (src >> 1) | ((flag_X & M68K_SR_X) << (32 - (M68K_SR_X_SFT + 1))); @@ -37264,7 +37509,7 @@ OPCODE(0xE018) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); flag_V = 0; flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37284,7 +37529,7 @@ OPCODE(0xE058) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); flag_V = 0; flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37304,7 +37549,7 @@ OPCODE(0xE098) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_V = 0; flag_C = src << ((M68K_SR_C_SFT + 1) - sft); @@ -37324,7 +37569,7 @@ OPCODE(0xE100) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); if (sft < 8) { @@ -37363,7 +37608,7 @@ OPCODE(0xE140) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); flag_X = flag_C = src >> (8 - sft); res = src << sft; @@ -37388,7 +37633,7 @@ OPCODE(0xE180) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_X = flag_C = src >> (24 - sft); res = src << sft; @@ -37413,7 +37658,7 @@ OPCODE(0xE108) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); flag_V = 0; flag_X = flag_C = src << (0 + sft); @@ -37433,7 +37678,7 @@ OPCODE(0xE148) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); flag_V = 0; flag_X = flag_C = src >> (8 - sft); @@ -37453,7 +37698,7 @@ OPCODE(0xE188) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_V = 0; flag_X = flag_C = src >> (24 - sft); @@ -37473,7 +37718,7 @@ OPCODE(0xE110) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); src |= (flag_X & M68K_SR_X) << 0; res = (src << sft) | (src >> (9 - sft)); @@ -37494,7 +37739,7 @@ OPCODE(0xE150) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); src |= (flag_X & M68K_SR_X) << 8; res = (src << sft) | (src >> (17 - sft)); @@ -37515,7 +37760,7 @@ OPCODE(0xE190) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_C = src >> ((32 - M68K_SR_C_SFT) - sft); if (sft == 1) res = (src << 1) | ((flag_X & M68K_SR_X) >> ((M68K_SR_X_SFT + 1) - 1)); @@ -37537,7 +37782,7 @@ OPCODE(0xE118) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu8((Opcode >> 0) & 7); flag_V = 0; flag_C = src << (0 + sft); @@ -37557,7 +37802,7 @@ OPCODE(0xE158) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu16((Opcode >> 0) & 7); flag_V = 0; flag_C = src >> (8 - sft); @@ -37577,7 +37822,7 @@ OPCODE(0xE198) u32 sft; sft = (((Opcode >> 9) - 1) & 7) + 1; - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; src = DREGu32((Opcode >> 0) & 7); flag_V = 0; flag_C = src >> (24 - sft); @@ -37600,7 +37845,7 @@ OPCODE(0xE020) src = (s32)DREGs8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 8) { flag_V = 0; @@ -37653,7 +37898,7 @@ OPCODE(0xE060) src = (s32)DREGs16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 16) { flag_V = 0; @@ -37711,7 +37956,7 @@ OPCODE(0xE0A0) src = (s32)DREGs32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 32) { flag_V = 0; @@ -37765,7 +38010,7 @@ OPCODE(0xE028) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft <= 8) { flag_N = flag_V = 0; @@ -37804,7 +38049,7 @@ OPCODE(0xE068) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft <= 16) { flag_N = flag_V = 0; @@ -37848,7 +38093,7 @@ OPCODE(0xE0A8) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 32) { flag_N = flag_V = 0; @@ -37890,7 +38135,7 @@ OPCODE(0xE030) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft %= 9; src |= (flag_X & M68K_SR_X) << 0; @@ -37922,7 +38167,7 @@ OPCODE(0xE070) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft %= 17; src |= (flag_X & M68K_SR_X) << 8; @@ -37959,14 +38204,14 @@ OPCODE(0xE0B0) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft %= 33; if (sft != 0) { if (sft == 1) res = (src >> 1) | ((flag_X & M68K_SR_X) << (32 - (M68K_SR_X_SFT + 1))); else res = (src >> sft) | (src << (33 - sft)) | (((flag_X & M68K_SR_X) << (32 - (M68K_SR_X_SFT + 1))) >> (sft - 1)); - flag_X = (src >> (32 - sft)) << M68K_SR_X_SFT; + flag_X = (src >> (sft - 1)) << M68K_SR_X_SFT; } else res = src; flag_C = flag_X; @@ -37997,7 +38242,7 @@ OPCODE(0xE038) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft &= 0x07; flag_C = src << (M68K_SR_C_SFT - ((sft - 1) & 7)); @@ -38028,7 +38273,7 @@ OPCODE(0xE078) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft &= 0x0F; flag_C = (src >> ((sft - 1) & 15)) << M68K_SR_C_SFT; @@ -38064,7 +38309,7 @@ OPCODE(0xE0B8) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft &= 0x1F; flag_C = (src >> ((sft - 1) & 31)) << M68K_SR_C_SFT; @@ -38096,7 +38341,7 @@ OPCODE(0xE120) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 8) { flag_X = flag_C = (src << sft) >> 0; @@ -38144,7 +38389,7 @@ OPCODE(0xE160) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 16) { flag_X = flag_C = (src << sft) >> 8; @@ -38197,7 +38442,7 @@ OPCODE(0xE1A0) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 32) { flag_X = flag_C = (src >> (32 - sft)) << M68K_SR_C_SFT; @@ -38246,7 +38491,7 @@ OPCODE(0xE128) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft <= 8) { flag_X = flag_C = (src << sft) >> 0; @@ -38286,7 +38531,7 @@ OPCODE(0xE168) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft <= 16) { flag_X = flag_C = (src << sft) >> 8; @@ -38331,7 +38576,7 @@ OPCODE(0xE1A8) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft < 32) { flag_X = flag_C = (src >> (32 - sft)) << M68K_SR_C_SFT; @@ -38374,7 +38619,7 @@ OPCODE(0xE130) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft %= 9; src |= (flag_X & M68K_SR_X) << 0; @@ -38406,7 +38651,7 @@ OPCODE(0xE170) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft %= 17; src |= (flag_X & M68K_SR_X) << 8; @@ -38443,7 +38688,7 @@ OPCODE(0xE1B0) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; sft %= 33; if (sft != 0) @@ -38481,7 +38726,7 @@ OPCODE(0xE138) src = DREGu8((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft &= 0x07) { flag_C = (src << sft) >> 0; @@ -38519,7 +38764,7 @@ OPCODE(0xE178) src = DREGu16((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft &= 0x0F) { flag_C = (src << sft) >> 8; @@ -38562,7 +38807,7 @@ OPCODE(0xE1B8) src = DREGu32((Opcode >> 0) & 7); if (sft) { - m68kcontext.io_cycle_counter -= sft * 2; + ctx->io_cycle_counter -= sft * 2; if (sft &= 0x1F) { flag_C = (src >> (32 - sft)) << M68K_SR_C_SFT; @@ -40069,7 +40314,7 @@ OPCODE(idle_detector_bcc8) if ( Opcode & 0x0100) newop |= 0x400; // beq if (!(Opcode & 0x0f00)) newop |= 0xc00; // bra - ret = SekRegisterIdlePatch(GET_PC - 2, Opcode, newop, &m68kcontext); + ret = SekRegisterIdlePatch(GET_PC - 2, Opcode, newop, ctx); switch (ret) { case 0: PC[-1] = newop; break; @@ -40085,7 +40330,7 @@ end: if (cond_true) { PC = dest_pc; - m68kcontext.io_cycle_counter -= 2; + ctx->io_cycle_counter -= 2; } RET(8) } diff --git a/cpu/musashi/m68k_in.c b/cpu/musashi/m68k_in.c index 5c9cc066..04fedf87 100644 --- a/cpu/musashi/m68k_in.c +++ b/cpu/musashi/m68k_in.c @@ -257,11 +257,62 @@ void m68ki_build_opcode_table(void) XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX M68KMAKE_OPCODE_HANDLER_HEADER +#include #include "m68kcpu.h" extern void m68040_fpu_op0(void); extern void m68040_fpu_op1(void); +/* Count non-0 bits */ +INLINE int m68ki_bit_count(uint32 arg) +{ + arg = arg - ((arg>>1)&0x55555555); + arg = (arg&0x33333333) + ((arg>>2)&0x33333333); + return (((arg + (arg>>4))&0x0f0f0f0f) * 0x01010101) >> 24; +} + +INLINE int m68ki_mulu_cycles(uint32 arg) +{ + if (CPU_TYPE_IS_000(CPU_TYPE)) + return m68ki_bit_count(arg) * 2; + else if (CPU_TYPE_IS_010(CPU_TYPE)) + return m68ki_bit_count(arg); /* guesswork */ + else + return m68ki_bit_count(arg) / 2; /* guesswork */ +} + +INLINE int m68ki_muls_cycles(sint32 arg) +{ + if (CPU_TYPE_IS_000(CPU_TYPE)) + return m68ki_bit_count(arg ^ (arg<<1)) * 2; + else if (CPU_TYPE_IS_010(CPU_TYPE)) + return m68ki_bit_count(arg ^ (arg<<1)); /* guesswork */ + else + return m68ki_bit_count(arg ^ (arg<<1)) / 2; /* guesswork */ +} + +INLINE int m68ki_divu_cycles(uint32 arg) +{ + /* approximation only. Doesn't factor in shorter cycles by carry */ + if (CPU_TYPE_IS_000(CPU_TYPE)) + return 128 - m68ki_bit_count(arg) * 2; + else if (CPU_TYPE_IS_010(CPU_TYPE)) + return 96; /* guesswork */ + else + return 32; /* guesswork */ +} + +INLINE int m68ki_divs_cycles(uint32 scyc, sint32 arg) +{ + /* approximation only. Doesn't factor in shorter cycles by carry */ + if (CPU_TYPE_IS_000(CPU_TYPE)) + return 128 - m68ki_bit_count(abs(arg)) * 2 + scyc*2 + 8; + else if (CPU_TYPE_IS_010(CPU_TYPE)) + return 96 + scyc*2 + 8; /* guesswork */ + else + return 32 + scyc + 4; /* guesswork */ +} + /* ======================================================================== */ /* ========================= INSTRUCTION HANDLERS ========================= */ /* ======================================================================== */ @@ -389,7 +440,7 @@ addi 32 . . 0000011010...... A+-DXWL... U U U U 20 20 4 4 addq 8 . d 0101...000000... .......... U U U U 4 4 2 2 addq 8 . . 0101...000...... A+-DXWL... U U U U 8 8 4 4 addq 16 . d 0101...001000... .......... U U U U 4 4 2 2 -addq 16 . a 0101...001001... .......... U U U U 4 4 2 2 +addq 16 . a 0101...001001... .......... U U U U 8 8 2 2 addq 16 . . 0101...001...... A+-DXWL... U U U U 8 8 4 4 addq 32 . d 0101...010000... .......... U U U U 8 8 2 2 addq 32 . a 0101...010001... .......... U U U U 8 8 2 2 @@ -418,7 +469,7 @@ andi 8 . d 0000001000000... .......... U U U U 8 8 2 2 andi 8 . . 0000001000...... A+-DXWL... U U U U 12 12 4 4 andi 16 . d 0000001001000... .......... U U U U 8 8 2 2 andi 16 . . 0000001001...... A+-DXWL... U U U U 12 12 4 4 -andi 32 . d 0000001010000... .......... U U U U 14 14 2 2 +andi 32 . d 0000001010000... .......... U U U U 16 14 2 2 andi 32 . . 0000001010...... A+-DXWL... U U U U 20 20 4 4 asr 8 s . 1110...000000... .......... U U U U 6 6 6 6 asr 16 s . 1110...001000... .......... U U U U 6 6 6 6 @@ -438,13 +489,13 @@ bcc 8 . . 0110............ .......... U U U U 10 10 6 6 bcc 16 . . 0110....00000000 .......... U U U U 10 10 6 6 bcc 32 . . 0110....11111111 .......... U U U U 10 10 6 6 bchg 8 r . 0000...101...... A+-DXWL... U U U U 8 8 4 4 -bchg 32 r d 0000...101000... .......... U U U U 8 8 4 4 +bchg 32 r d 0000...101000... .......... U U U U 6 6 4 4 bchg 8 s . 0000100001...... A+-DXWL... U U U U 12 12 4 4 -bchg 32 s d 0000100001000... .......... U U U U 12 12 4 4 -bclr 8 r . 0000...110...... A+-DXWL... U U U U 8 10 4 4 -bclr 32 r d 0000...110000... .......... U U U U 10 10 4 4 +bchg 32 s d 0000100001000... .......... U U U U 10 10 4 4 +bclr 8 r . 0000...110...... A+-DXWL... U U U U 8 8 4 4 +bclr 32 r d 0000...110000... .......... U U U U 8 8 4 4 bclr 8 s . 0000100010...... A+-DXWL... U U U U 12 12 4 4 -bclr 32 s d 0000100010000... .......... U U U U 14 14 4 4 +bclr 32 s d 0000100010000... .......... U U U U 12 12 4 4 bfchg 32 . d 1110101011000... .......... . . U U . . 12 12 timing not quite correct bfchg 32 . . 1110101011...... A..DXWL... . . U U . . 20 20 bfclr 32 . d 1110110011000... .......... . . U U . . 12 12 @@ -465,10 +516,10 @@ bkpt 0 . . 0100100001001... .......... . U U U . 10 10 10 bra 8 . . 01100000........ .......... U U U U 10 10 10 10 bra 16 . . 0110000000000000 .......... U U U U 10 10 10 10 bra 32 . . 0110000011111111 .......... U U U U 10 10 10 10 -bset 32 r d 0000...111000... .......... U U U U 8 8 4 4 +bset 32 r d 0000...111000... .......... U U U U 6 6 4 4 bset 8 r . 0000...111...... A+-DXWL... U U U U 8 8 4 4 bset 8 s . 0000100011...... A+-DXWL... U U U U 12 12 4 4 -bset 32 s d 0000100011000... .......... U U U U 12 12 4 4 +bset 32 s d 0000100011000... .......... U U U U 10 10 4 4 bsr 8 . . 01100001........ .......... U U U U 18 18 7 7 bsr 16 . . 0110000100000000 .......... U U U U 18 18 7 7 bsr 32 . . 0110000111111111 .......... U U U U 18 18 7 7 @@ -482,8 +533,8 @@ cas 16 . . 0000110011...... A+-DXWL... . . U U . . 12 12 cas 32 . . 0000111011...... A+-DXWL... . . U U . . 12 12 cas2 16 . . 0000110011111100 .......... . . U U . . 12 12 cas2 32 . . 0000111011111100 .......... . . U U . . 12 12 -chk 16 . d 0100...110000... .......... U U U U 10 8 8 8 -chk 16 . . 0100...110...... A+-DXWLdxI U U U U 10 8 8 8 +chk 16 . d 0100...110000... .......... U U U U 4 2 2 2 +chk 16 . . 0100...110...... A+-DXWLdxI U U U U 4 2 2 2 chk 32 . d 0100...100000... .......... . . U U . . 8 8 chk 32 . . 0100...100...... A+-DXWLdxI . . U U . . 8 8 chk2cmp2 8 . pcdi 0000000011111010 .......... . . U U . . 23 23 @@ -541,10 +592,10 @@ cptrapcc 32 . . 1111...001111... .......... . . U . . . 4 . dbt 16 . . 0101000011001... .......... U U U U 12 12 6 6 dbf 16 . . 0101000111001... .......... U U U U 12 12 6 6 dbcc 16 . . 0101....11001... .......... U U U U 12 12 6 6 -divs 16 . d 1000...111000... .......... U U U U 158 122 56 56 -divs 16 . . 1000...111...... A+-DXWLdxI U U U U 158 122 56 56 -divu 16 . d 1000...011000... .......... U U U U 140 108 44 44 -divu 16 . . 1000...011...... A+-DXWLdxI U U U U 140 108 44 44 +divs 16 . d 1000...111000... .......... U U U U 16 16 16 16 cycles depending on operands +divs 16 . . 1000...111...... A+-DXWLdxI U U U U 16 16 16 16 cycles depending on operands +divu 16 . d 1000...011000... .......... U U U U 10 10 10 10 cycles depending on operands +divu 16 . . 1000...011...... A+-DXWLdxI U U U U 10 10 10 10 cycles depending on operands divl 32 . d 0100110001000... .......... . . U U . . 84 84 divl 32 . . 0100110001...... A+-DXWLdxI . . U U . . 84 84 eor 8 . d 1011...100000... .......... U U U U 4 4 2 2 @@ -696,10 +747,10 @@ moves 8 . . 0000111000...... A+-DXWL... . S S S . 14 5 5 moves 16 . . 0000111001...... A+-DXWL... . S S S . 14 5 5 moves 32 . . 0000111010...... A+-DXWL... . S S S . 16 5 5 move16 32 . . 1111011000100... .......... . . . U . . . 4 TODO: correct timing -muls 16 . d 1100...111000... .......... U U U U 54 32 27 27 -muls 16 . . 1100...111...... A+-DXWLdxI U U U U 54 32 27 27 -mulu 16 . d 1100...011000... .......... U U U U 54 30 27 27 -mulu 16 . . 1100...011...... A+-DXWLdxI U U U U 54 30 27 27 +muls 16 . d 1100...111000... .......... U U U U 38 28 20 20 cycles depending on operands +muls 16 . . 1100...111...... A+-DXWLdxI U U U U 38 28 20 20 cycles depending on operands +mulu 16 . d 1100...011000... .......... U U U U 38 26 20 20 cycles depending on operands +mulu 16 . . 1100...011...... A+-DXWLdxI U U U U 38 26 20 20 cycles depending on operands mull 32 . d 0100110000000... .......... . . U U . . 43 43 mull 32 . . 0100110000...... A+-DXWLdxI . . U U . . 43 43 nbcd 8 . d 0100100000000... .......... U U U U 6 6 6 6 @@ -835,7 +886,7 @@ subx 16 mm . 1001...101001... .......... U U U U 18 18 12 12 subx 32 mm . 1001...110001... .......... U U U U 30 30 12 12 swap 32 . . 0100100001000... .......... U U U U 4 4 4 4 tas 8 . d 0100101011000... .......... U U U U 4 4 4 4 -tas 8 . . 0100101011...... A+-DXWL... U U U U 14 14 12 12 +tas 8 . . 0100101011...... A+-DXWL... U U U U 10 10 8 8 trap 0 . . 010011100100.... .......... U U U U 4 4 4 4 trapt 0 . . 0101000011111100 .......... . . U U . . 4 4 trapt 16 . . 0101000011111010 .......... . . U U . . 6 6 @@ -918,13 +969,15 @@ M68KMAKE_OP(abcd, 8, rr, .) uint src = DY; uint dst = *r_dst; uint res = LOW_NIBBLE(src) + LOW_NIBBLE(dst) + XFLAG_AS_1(); - - FLAG_V = ~res; /* Undefined V behavior */ + uint corf = 0; if(res > 9) - res += 6; + corf = 6; res += HIGH_NIBBLE(src) + HIGH_NIBBLE(dst); - FLAG_X = FLAG_C = (res > 0x99) << 8; + FLAG_V = ~res; /* Undefined V behavior */ + + res += corf; + FLAG_X = FLAG_C = (res > 0x9f) << 8; if(FLAG_C) res -= 0xa0; @@ -944,13 +997,15 @@ M68KMAKE_OP(abcd, 8, mm, ax7) uint ea = EA_A7_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(src) + LOW_NIBBLE(dst) + XFLAG_AS_1(); - - FLAG_V = ~res; /* Undefined V behavior */ + uint corf = 0; if(res > 9) - res += 6; + corf = 6; res += HIGH_NIBBLE(src) + HIGH_NIBBLE(dst); - FLAG_X = FLAG_C = (res > 0x99) << 8; + FLAG_V = ~res; /* Undefined V behavior */ + + res += corf; + FLAG_X = FLAG_C = (res > 0x9f) << 8; if(FLAG_C) res -= 0xa0; @@ -970,13 +1025,15 @@ M68KMAKE_OP(abcd, 8, mm, ay7) uint ea = EA_AX_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(src) + LOW_NIBBLE(dst) + XFLAG_AS_1(); - - FLAG_V = ~res; /* Undefined V behavior */ + uint corf = 0; if(res > 9) - res += 6; + corf = 6; res += HIGH_NIBBLE(src) + HIGH_NIBBLE(dst); - FLAG_X = FLAG_C = (res > 0x99) << 8; + FLAG_V = ~res; /* Undefined V behavior */ + + res += corf; + FLAG_X = FLAG_C = (res > 0x9f) << 8; if(FLAG_C) res -= 0xa0; @@ -996,13 +1053,15 @@ M68KMAKE_OP(abcd, 8, mm, axy7) uint ea = EA_A7_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(src) + LOW_NIBBLE(dst) + XFLAG_AS_1(); - - FLAG_V = ~res; /* Undefined V behavior */ + uint corf = 0; if(res > 9) - res += 6; + corf = 6; res += HIGH_NIBBLE(src) + HIGH_NIBBLE(dst); - FLAG_X = FLAG_C = (res > 0x99) << 8; + FLAG_V = ~res; /* Undefined V behavior */ + + res += corf; + FLAG_X = FLAG_C = (res > 0x9f) << 8; if(FLAG_C) res -= 0xa0; @@ -1022,13 +1081,15 @@ M68KMAKE_OP(abcd, 8, mm, .) uint ea = EA_AX_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(src) + LOW_NIBBLE(dst) + XFLAG_AS_1(); - - FLAG_V = ~res; /* Undefined V behavior */ + uint corf = 0; if(res > 9) - res += 6; + corf = 6; res += HIGH_NIBBLE(src) + HIGH_NIBBLE(dst); - FLAG_X = FLAG_C = (res > 0x99) << 8; + FLAG_V = ~res; /* Undefined V behavior */ + + res += corf; + FLAG_X = FLAG_C = (res > 0x9f) << 8; if(FLAG_C) res -= 0xa0; @@ -2322,6 +2383,8 @@ M68KMAKE_OP(bchg, 32, r, d) uint* r_dst = &DY; uint mask = 1 << (DX & 0x1f); + if(CPU_TYPE_IS_010_LESS(CPU_TYPE) && mask >= 0x10000) + USE_CYCLES(2); FLAG_Z = *r_dst & mask; *r_dst ^= mask; } @@ -2343,6 +2406,8 @@ M68KMAKE_OP(bchg, 32, s, d) uint* r_dst = &DY; uint mask = 1 << (OPER_I_8() & 0x1f); + if(CPU_TYPE_IS_010_LESS(CPU_TYPE) && mask >= 0x10000) + USE_CYCLES(2); FLAG_Z = *r_dst & mask; *r_dst ^= mask; } @@ -2364,6 +2429,8 @@ M68KMAKE_OP(bclr, 32, r, d) uint* r_dst = &DY; uint mask = 1 << (DX & 0x1f); + if(CPU_TYPE_IS_010_LESS(CPU_TYPE) && mask >= 0x10000) + USE_CYCLES(2); FLAG_Z = *r_dst & mask; *r_dst &= ~mask; } @@ -2385,6 +2452,8 @@ M68KMAKE_OP(bclr, 32, s, d) uint* r_dst = &DY; uint mask = 1 << (OPER_I_8() & 0x1f); + if(CPU_TYPE_IS_010_LESS(CPU_TYPE) && mask >= 0x10000) + USE_CYCLES(2); FLAG_Z = *r_dst & mask; *r_dst &= ~mask; } @@ -3172,6 +3241,8 @@ M68KMAKE_OP(bset, 32, r, d) uint* r_dst = &DY; uint mask = 1 << (DX & 0x1f); + if(CPU_TYPE_IS_010_LESS(CPU_TYPE) && mask >= 0x10000) + USE_CYCLES(2); FLAG_Z = *r_dst & mask; *r_dst |= mask; } @@ -3193,6 +3264,8 @@ M68KMAKE_OP(bset, 32, s, d) uint* r_dst = &DY; uint mask = 1 << (OPER_I_8() & 0x1f); + if(CPU_TYPE_IS_010_LESS(CPU_TYPE) && mask >= 0x10000) + USE_CYCLES(2); FLAG_Z = *r_dst & mask; *r_dst |= mask; } @@ -3480,6 +3553,7 @@ M68KMAKE_OP(chk, 16, ., d) if(src >= 0 && src <= bound) { + USE_CYCLES(6); return; } FLAG_N = (src < 0)<<7; @@ -3498,6 +3572,7 @@ M68KMAKE_OP(chk, 16, ., .) if(src >= 0 && src <= bound) { + USE_CYCLES(6); return; } FLAG_N = (src < 0)<<7; @@ -4477,8 +4552,10 @@ M68KMAKE_OP(divs, 16, ., d) { uint* r_dst = &DX; sint src = MAKE_INT_16(DY); + sint dst = MAKE_INT_32(*r_dst); sint quotient; sint remainder; + int cycles; if(src != 0) { @@ -4489,12 +4566,21 @@ M68KMAKE_OP(divs, 16, ., d) FLAG_V = VFLAG_CLEAR; FLAG_C = CFLAG_CLEAR; *r_dst = 0; + USE_CYCLES(m68ki_divs_cycles(2, 0)); return; } - quotient = MAKE_INT_32(*r_dst) / src; - remainder = MAKE_INT_32(*r_dst) % src; + if(abs(dst) >= abs(src<<16)) + { + FLAG_V = VFLAG_SET; + USE_CYCLES(2*(dst < 0)); + return; + } + quotient = dst / src; + remainder = dst % src; + + cycles = m68ki_divs_cycles(2*(dst < 0) + (quotient < 0), quotient); if(quotient == MAKE_INT_16(quotient)) { FLAG_Z = quotient; @@ -4502,12 +4588,15 @@ M68KMAKE_OP(divs, 16, ., d) FLAG_V = VFLAG_CLEAR; FLAG_C = CFLAG_CLEAR; *r_dst = MASK_OUT_ABOVE_32(MASK_OUT_ABOVE_16(quotient) | (remainder << 16)); + USE_CYCLES(cycles); return; } FLAG_V = VFLAG_SET; + USE_CYCLES(cycles); return; } m68ki_exception_trap(EXCEPTION_ZERO_DIVIDE); + ADD_CYCLES(12); } @@ -4515,8 +4604,10 @@ M68KMAKE_OP(divs, 16, ., .) { uint* r_dst = &DX; sint src = MAKE_INT_16(M68KMAKE_GET_OPER_AY_16); + sint dst = MAKE_INT_32(*r_dst); sint quotient; sint remainder; + int cycles; if(src != 0) { @@ -4527,12 +4618,21 @@ M68KMAKE_OP(divs, 16, ., .) FLAG_V = VFLAG_CLEAR; FLAG_C = CFLAG_CLEAR; *r_dst = 0; + USE_CYCLES(m68ki_divs_cycles(2, 0)); return; } - quotient = MAKE_INT_32(*r_dst) / src; - remainder = MAKE_INT_32(*r_dst) % src; + if(abs(dst) >= abs(src<<16)) + { + FLAG_V = VFLAG_SET; + USE_CYCLES(2*(dst < 0)); + return; + } + quotient = dst / src; + remainder = dst % src; + + cycles = m68ki_divs_cycles(2*(dst < 0) + (quotient < 0), quotient); if(quotient == MAKE_INT_16(quotient)) { FLAG_Z = quotient; @@ -4540,12 +4640,15 @@ M68KMAKE_OP(divs, 16, ., .) FLAG_V = VFLAG_CLEAR; FLAG_C = CFLAG_CLEAR; *r_dst = MASK_OUT_ABOVE_32(MASK_OUT_ABOVE_16(quotient) | (remainder << 16)); + USE_CYCLES(cycles); return; } FLAG_V = VFLAG_SET; + USE_CYCLES(cycles); return; } m68ki_exception_trap(EXCEPTION_ZERO_DIVIDE); + ADD_CYCLES(12); } @@ -4566,12 +4669,14 @@ M68KMAKE_OP(divu, 16, ., d) FLAG_V = VFLAG_CLEAR; FLAG_C = CFLAG_CLEAR; *r_dst = MASK_OUT_ABOVE_32(MASK_OUT_ABOVE_16(quotient) | (remainder << 16)); + USE_CYCLES(m68ki_divu_cycles(quotient)); return; } FLAG_V = VFLAG_SET; return; } m68ki_exception_trap(EXCEPTION_ZERO_DIVIDE); + ADD_CYCLES(6); } @@ -4592,12 +4697,14 @@ M68KMAKE_OP(divu, 16, ., .) FLAG_V = VFLAG_CLEAR; FLAG_C = CFLAG_CLEAR; *r_dst = MASK_OUT_ABOVE_32(MASK_OUT_ABOVE_16(quotient) | (remainder << 16)); + USE_CYCLES(m68ki_divu_cycles(quotient)); return; } FLAG_V = VFLAG_SET; return; } m68ki_exception_trap(EXCEPTION_ZERO_DIVIDE); + ADD_CYCLES(6); } @@ -7490,9 +7597,11 @@ M68KMAKE_OP(move16, 32, ., .) M68KMAKE_OP(muls, 16, ., d) { uint* r_dst = &DX; - uint res = MASK_OUT_ABOVE_32(MAKE_INT_16(DY) * MAKE_INT_16(MASK_OUT_ABOVE_16(*r_dst))); + uint x = MAKE_INT_16(DY); + uint res = MASK_OUT_ABOVE_32(x * MAKE_INT_16(MASK_OUT_ABOVE_16(*r_dst))); *r_dst = res; + USE_CYCLES(m68ki_muls_cycles(x)); FLAG_Z = res; FLAG_N = NFLAG_32(res); @@ -7504,9 +7613,11 @@ M68KMAKE_OP(muls, 16, ., d) M68KMAKE_OP(muls, 16, ., .) { uint* r_dst = &DX; - uint res = MASK_OUT_ABOVE_32(MAKE_INT_16(M68KMAKE_GET_OPER_AY_16) * MAKE_INT_16(MASK_OUT_ABOVE_16(*r_dst))); + uint x = MAKE_INT_16(M68KMAKE_GET_OPER_AY_16); + uint res = MASK_OUT_ABOVE_32(x * MAKE_INT_16(MASK_OUT_ABOVE_16(*r_dst))); *r_dst = res; + USE_CYCLES(m68ki_muls_cycles(x)); FLAG_Z = res; FLAG_N = NFLAG_32(res); @@ -7518,9 +7629,11 @@ M68KMAKE_OP(muls, 16, ., .) M68KMAKE_OP(mulu, 16, ., d) { uint* r_dst = &DX; - uint res = MASK_OUT_ABOVE_16(DY) * MASK_OUT_ABOVE_16(*r_dst); + uint x = MASK_OUT_ABOVE_16(DY); + uint res = x * MASK_OUT_ABOVE_16(*r_dst); *r_dst = res; + USE_CYCLES(m68ki_mulu_cycles(x)); FLAG_Z = res; FLAG_N = NFLAG_32(res); @@ -7532,9 +7645,11 @@ M68KMAKE_OP(mulu, 16, ., d) M68KMAKE_OP(mulu, 16, ., .) { uint* r_dst = &DX; - uint res = M68KMAKE_GET_OPER_AY_16 * MASK_OUT_ABOVE_16(*r_dst); + uint x = M68KMAKE_GET_OPER_AY_16; + uint res = x * MASK_OUT_ABOVE_16(*r_dst); *r_dst = res; + USE_CYCLES(m68ki_mulu_cycles(x)); FLAG_Z = res; FLAG_N = NFLAG_32(res); @@ -7794,19 +7909,19 @@ M68KMAKE_OP(mull, 32, ., .) M68KMAKE_OP(nbcd, 8, ., d) { uint* r_dst = &DY; - uint dst = *r_dst; - uint res = MASK_OUT_ABOVE_8(0x9a - dst - XFLAG_AS_1()); + uint dst = MASK_OUT_ABOVE_8(*r_dst); + uint res = -dst - XFLAG_AS_1(); - if(res != 0x9a) + if(res != 0) { - FLAG_V = ~res; /* Undefined V behavior */ + FLAG_V = res; /* Undefined V behavior */ - if((res & 0x0f) == 0xa) - res = (res & 0xf0) + 0x10; + if(((res|dst) & 0x0f) == 0) + res = (res & 0xf0) + 6; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res + 0x9a); - FLAG_V &= res; /* Undefined V behavior part II */ + FLAG_V &= ~res; /* Undefined V behavior part II */ *r_dst = MASK_OUT_BELOW_8(*r_dst) | res; @@ -7828,18 +7943,18 @@ M68KMAKE_OP(nbcd, 8, ., .) { uint ea = M68KMAKE_GET_EA_AY_8; uint dst = m68ki_read_8(ea); - uint res = MASK_OUT_ABOVE_8(0x9a - dst - XFLAG_AS_1()); + uint res = -dst - XFLAG_AS_1(); - if(res != 0x9a) + if(res != 0) { - FLAG_V = ~res; /* Undefined V behavior */ + FLAG_V = res; /* Undefined V behavior */ - if((res & 0x0f) == 0xa) - res = (res & 0xf0) + 0x10; + if(((res|dst) & 0x0f) == 0) + res = (res & 0xf0) + 6; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res + 0x9a); - FLAG_V &= res; /* Undefined V behavior part II */ + FLAG_V &= ~res; /* Undefined V behavior part II */ m68ki_write_8(ea, MASK_OUT_ABOVE_8(res)); @@ -9359,26 +9474,26 @@ M68KMAKE_OP(sbcd, 8, rr, .) uint src = DY; uint dst = *r_dst; uint res = LOW_NIBBLE(dst) - LOW_NIBBLE(src) - XFLAG_AS_1(); + uint corf = 0; -// FLAG_V = ~res; /* Undefined V behavior */ - FLAG_V = VFLAG_CLEAR; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to assume cleared. */ - - if(res > 9) - res -= 6; + if(res > 0xf) + corf = 6; res += HIGH_NIBBLE(dst) - HIGH_NIBBLE(src); - if(res > 0x99) + FLAG_V = res; /* Undefined V behavior */ + if(res > 0xff) { res += 0xa0; FLAG_X = FLAG_C = CFLAG_SET; - FLAG_N = NFLAG_SET; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to follow carry. */ } + else if(res < corf) + FLAG_X = FLAG_C = CFLAG_SET; else - FLAG_N = FLAG_X = FLAG_C = 0; + FLAG_X = FLAG_C = 0; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res - corf); -// FLAG_V &= res; /* Undefined V behavior part II */ -// FLAG_N = NFLAG_8(res); /* Undefined N behavior */ + FLAG_V &= ~res; /* Undefined V behavior part II */ + FLAG_N = NFLAG_8(res); /* Undefined N behavior */ FLAG_Z |= res; *r_dst = MASK_OUT_BELOW_8(*r_dst) | res; @@ -9391,26 +9506,26 @@ M68KMAKE_OP(sbcd, 8, mm, ax7) uint ea = EA_A7_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(dst) - LOW_NIBBLE(src) - XFLAG_AS_1(); + uint corf = 0; -// FLAG_V = ~res; /* Undefined V behavior */ - FLAG_V = VFLAG_CLEAR; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to return zero. */ - - if(res > 9) - res -= 6; + if(res > 0xf) + corf = 6; res += HIGH_NIBBLE(dst) - HIGH_NIBBLE(src); - if(res > 0x99) + FLAG_V = res; /* Undefined V behavior */ + if(res > 0xff) { res += 0xa0; FLAG_X = FLAG_C = CFLAG_SET; - FLAG_N = NFLAG_SET; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to follow carry. */ } + else if(res < corf) + FLAG_X = FLAG_C = CFLAG_SET; else - FLAG_N = FLAG_X = FLAG_C = 0; + FLAG_X = FLAG_C = 0; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res - corf); -// FLAG_V &= res; /* Undefined V behavior part II */ -// FLAG_N = NFLAG_8(res); /* Undefined N behavior */ + FLAG_V &= ~res; /* Undefined V behavior part II */ + FLAG_N = NFLAG_8(res); /* Undefined N behavior */ FLAG_Z |= res; m68ki_write_8(ea, res); @@ -9423,26 +9538,26 @@ M68KMAKE_OP(sbcd, 8, mm, ay7) uint ea = EA_AX_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(dst) - LOW_NIBBLE(src) - XFLAG_AS_1(); + uint corf = 0; -// FLAG_V = ~res; /* Undefined V behavior */ - FLAG_V = VFLAG_CLEAR; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to return zero. */ - - if(res > 9) - res -= 6; + if(res > 0xf) + corf = 6; res += HIGH_NIBBLE(dst) - HIGH_NIBBLE(src); - if(res > 0x99) + FLAG_V = res; /* Undefined V behavior */ + if(res > 0xff) { res += 0xa0; FLAG_X = FLAG_C = CFLAG_SET; - FLAG_N = NFLAG_SET; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to follow carry. */ } + else if(res < corf) + FLAG_X = FLAG_C = CFLAG_SET; else - FLAG_N = FLAG_X = FLAG_C = 0; + FLAG_X = FLAG_C = 0; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res - corf); -// FLAG_V &= res; /* Undefined V behavior part II */ -// FLAG_N = NFLAG_8(res); /* Undefined N behavior */ + FLAG_V &= ~res; /* Undefined V behavior part II */ + FLAG_N = NFLAG_8(res); /* Undefined N behavior */ FLAG_Z |= res; m68ki_write_8(ea, res); @@ -9455,26 +9570,26 @@ M68KMAKE_OP(sbcd, 8, mm, axy7) uint ea = EA_A7_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(dst) - LOW_NIBBLE(src) - XFLAG_AS_1(); + uint corf = 0; -// FLAG_V = ~res; /* Undefined V behavior */ - FLAG_V = VFLAG_CLEAR; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to return zero. */ - - if(res > 9) - res -= 6; + if(res > 0xf) + corf = 6; res += HIGH_NIBBLE(dst) - HIGH_NIBBLE(src); - if(res > 0x99) + FLAG_V = res; /* Undefined V behavior */ + if(res > 0xff) { res += 0xa0; FLAG_X = FLAG_C = CFLAG_SET; - FLAG_N = NFLAG_SET; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to follow carry. */ } + else if(res < corf) + FLAG_X = FLAG_C = CFLAG_SET; else - FLAG_N = FLAG_X = FLAG_C = 0; + FLAG_X = FLAG_C = 0; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res - corf); -// FLAG_V &= res; /* Undefined V behavior part II */ -// FLAG_N = NFLAG_8(res); /* Undefined N behavior */ + FLAG_V &= ~res; /* Undefined V behavior part II */ + FLAG_N = NFLAG_8(res); /* Undefined N behavior */ FLAG_Z |= res; m68ki_write_8(ea, res); @@ -9487,26 +9602,26 @@ M68KMAKE_OP(sbcd, 8, mm, .) uint ea = EA_AX_PD_8(); uint dst = m68ki_read_8(ea); uint res = LOW_NIBBLE(dst) - LOW_NIBBLE(src) - XFLAG_AS_1(); + uint corf = 0; -// FLAG_V = ~res; /* Undefined V behavior */ - FLAG_V = VFLAG_CLEAR; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to return zero. */ - - if(res > 9) - res -= 6; + if(res > 0xf) + corf = 6; res += HIGH_NIBBLE(dst) - HIGH_NIBBLE(src); - if(res > 0x99) + FLAG_V = res; /* Undefined V behavior */ + if(res > 0xff) { res += 0xa0; FLAG_X = FLAG_C = CFLAG_SET; - FLAG_N = NFLAG_SET; /* Undefined in Motorola's M68000PM/AD rev.1 and safer to follow carry. */ } + else if(res < corf) + FLAG_X = FLAG_C = CFLAG_SET; else - FLAG_N = FLAG_X = FLAG_C = 0; + FLAG_X = FLAG_C = 0; - res = MASK_OUT_ABOVE_8(res); + res = MASK_OUT_ABOVE_8(res - corf); -// FLAG_V &= res; /* Undefined V behavior part II */ -// FLAG_N = NFLAG_8(res); /* Undefined N behavior */ + FLAG_V &= ~res; /* Undefined V behavior part II */ + FLAG_N = NFLAG_8(res); /* Undefined N behavior */ FLAG_Z |= res; m68ki_write_8(ea, res); diff --git a/cpu/musashi/m68kcpu.c b/cpu/musashi/m68kcpu.c index 72bb217f..1ac1a5d9 100644 --- a/cpu/musashi/m68kcpu.c +++ b/cpu/musashi/m68kcpu.c @@ -43,7 +43,7 @@ extern void m68040_fpu_op1(void); /* ================================= DATA ================================= */ /* ======================================================================== */ -int m68ki_initial_cycles; +//int m68ki_initial_cycles; //int m68ki_remaining_cycles = 0; /* Number of clocks remaining */ uint m68ki_tracing = 0; uint m68ki_address_space; @@ -133,13 +133,13 @@ uint8 m68ki_exception_cycle_table[4][256] = 50, /* 2: Bus Error (unemulated) */ 50, /* 3: Address Error (unemulated) */ 34, /* 4: Illegal Instruction */ - 38, /* 5: Divide by Zero -- ASG: changed from 42 */ - 40, /* 6: CHK -- ASG: chanaged from 44 */ + 34, /* 5: Divide by Zero -- ASG: changed from 42 */ + 34, /* 6: CHK -- ASG: chanaged from 44 */ 34, /* 7: TRAPV */ 34, /* 8: Privilege Violation */ 34, /* 9: Trace */ - 4, /* 10: 1010 */ - 4, /* 11: 1111 */ + 34, /* 10: 1010 */ + 34, /* 11: 1111 */ 4, /* 12: RESERVED */ 4, /* 13: Coprocessor Protocol Violation (unemulated) */ 4, /* 14: Format Error */ @@ -206,13 +206,13 @@ uint8 m68ki_exception_cycle_table[4][256] = 126, /* 2: Bus Error (unemulated) */ 126, /* 3: Address Error (unemulated) */ 38, /* 4: Illegal Instruction */ - 44, /* 5: Divide by Zero */ - 44, /* 6: CHK */ + 38, /* 5: Divide by Zero */ + 38, /* 6: CHK */ 34, /* 7: TRAPV */ 38, /* 8: Privilege Violation */ 38, /* 9: Trace */ - 4, /* 10: 1010 */ - 4, /* 11: 1111 */ + 38, /* 10: 1010 */ + 38, /* 11: 1111 */ 4, /* 12: RESERVED */ 4, /* 13: Coprocessor Protocol Violation (unemulated) */ 4, /* 14: Format Error */ @@ -279,8 +279,8 @@ uint8 m68ki_exception_cycle_table[4][256] = 50, /* 2: Bus Error (unemulated) */ 50, /* 3: Address Error (unemulated) */ 20, /* 4: Illegal Instruction */ - 38, /* 5: Divide by Zero */ - 40, /* 6: CHK */ + 34, /* 5: Divide by Zero */ + 34, /* 6: CHK */ 20, /* 7: TRAPV */ 34, /* 8: Privilege Violation */ 25, /* 9: Trace */ @@ -352,8 +352,8 @@ uint8 m68ki_exception_cycle_table[4][256] = 50, /* 2: Bus Error (unemulated) */ 50, /* 3: Address Error (unemulated) */ 20, /* 4: Illegal Instruction */ - 38, /* 5: Divide by Zero */ - 40, /* 6: CHK */ + 34, /* 5: Divide by Zero */ + 34, /* 6: CHK */ 20, /* 7: TRAPV */ 34, /* 8: Privilege Violation */ 25, /* 9: Trace */ @@ -787,12 +787,12 @@ void m68k_set_cpu_type(unsigned int cpu_type) /* ASG: removed per-instruction interrupt checks */ int m68k_execute(int num_cycles) { + // notaz + m68ki_check_interrupts(); + /* Make sure we're not stopped */ if(!CPU_STOPPED) { - // notaz - m68ki_check_interrupts(); - /* Set our pool of clock cycles available */ SET_CYCLES(num_cycles); m68ki_initial_cycles = num_cycles; @@ -808,7 +808,7 @@ int m68k_execute(int num_cycles) // notaz m68ki_trace_t1(); - while(GET_CYCLES() >= 0) + while(GET_CYCLES() > 0) // do { /* Set tracing accodring to T1. (T0 is done inside instruction) */ @@ -828,6 +828,10 @@ int m68k_execute(int num_cycles) m68ki_instruction_jump_table[REG_IR](); USE_CYCLES(CYC_INSTRUCTION[REG_IR]); // moving this up may cause a deadlock + /* ASG: update cycles */ + USE_CYCLES(CPU_INT_CYCLES); + CPU_INT_CYCLES = 0; + /* Trace m68k_exception, if necessary */ m68ki_exception_if_trace(); /* auto-disable (see m68kcpu.h) */ @@ -839,10 +843,6 @@ int m68k_execute(int num_cycles) /* set previous PC to current PC for the next entry into the loop */ REG_PPC = REG_PC; - /* ASG: update cycles */ - USE_CYCLES(CPU_INT_CYCLES); - CPU_INT_CYCLES = 0; - /* return how many clocks we used */ return m68ki_initial_cycles - GET_CYCLES(); } diff --git a/cpu/musashi/m68kcpu.h b/cpu/musashi/m68kcpu.h index 83e92c5f..3b40765f 100644 --- a/cpu/musashi/m68kcpu.h +++ b/cpu/musashi/m68kcpu.h @@ -27,14 +27,11 @@ #define M68KCPU__HEADER // notaz: something's missing this -#ifndef UINT64 -#define UINT64 unsigned long long -#endif -#ifndef UINT16 -#define UINT32 unsigned int -#define UINT16 unsigned short -#define UINT8 unsigned char -#endif +#include +typedef uint64_t UINT64; +typedef uint32_t UINT32; +typedef uint16_t UINT16; +typedef uint8_t UINT8; #include "m68k.h" #include @@ -74,8 +71,12 @@ #define uint32 unsigned int /* AWJ: changed from long to int */ /* signed and unsigned int must be at least 32 bits wide */ -#define sint signed int -#define uint unsigned int +//#define sint signed int +//#define uint unsigned int +#define sint _sint +#define uint _uint +typedef signed int sint; +typedef unsigned int uint; #if M68K_USE_64_BIT @@ -142,6 +143,7 @@ /* Exception Vectors handled by emulation */ #define EXCEPTION_BUS_ERROR 2 /* This one is not emulated! */ #define EXCEPTION_ADDRESS_ERROR 3 /* This one is partially emulated (doesn't stack a proper frame yet) */ +#undef EXCEPTION_ILLEGAL_INSTRUCTION #define EXCEPTION_ILLEGAL_INSTRUCTION 4 #define EXCEPTION_ZERO_DIVIDE 5 #define EXCEPTION_CHK 6 @@ -918,6 +920,7 @@ typedef struct void (*instr_hook_callback)(void); /* Called every instruction cycle prior to execution */ // notaz + sint cyc_initial_cycles; sint cyc_remaining_cycles; sint not_polling; } m68ki_cpu_core; @@ -925,6 +928,7 @@ typedef struct // notaz extern m68ki_cpu_core *m68ki_cpu_p; #define m68ki_cpu (*m68ki_cpu_p) +#define m68ki_initial_cycles m68ki_cpu_p->cyc_initial_cycles #define m68ki_remaining_cycles m68ki_cpu_p->cyc_remaining_cycles @@ -1537,7 +1541,7 @@ INLINE void m68ki_set_sr_noint_nosp(uint value) INLINE void m68ki_set_sr(uint value) { m68ki_set_sr_noint(value); - if (GET_CYCLES() >= 0) // notaz + if (GET_CYCLES() > 0) // notaz m68ki_check_interrupts(); } @@ -1623,7 +1627,7 @@ INLINE void m68ki_stack_frame_buserr(uint sr) /* Format 8 stack frame (68010). * 68010 only. This is the 29 word bus/address error frame. */ -void m68ki_stack_frame_1000(uint pc, uint sr, uint vector) +INLINE void m68ki_stack_frame_1000(uint pc, uint sr, uint vector) { /* VERSION * NUMBER @@ -1677,7 +1681,7 @@ void m68ki_stack_frame_1000(uint pc, uint sr, uint vector) * if the error happens at an instruction boundary. * PC stacked is address of next instruction. */ -void m68ki_stack_frame_1010(uint sr, uint vector, uint pc) +INLINE void m68ki_stack_frame_1010(uint sr, uint vector, uint pc) { /* INTERNAL REGISTER */ m68ki_push_16(0); @@ -1724,7 +1728,7 @@ void m68ki_stack_frame_1010(uint sr, uint vector, uint pc) * if the error happens during instruction execution. * PC stacked is address of instruction in progress. */ -void m68ki_stack_frame_1011(uint sr, uint vector, uint pc) +INLINE void m68ki_stack_frame_1011(uint sr, uint vector, uint pc) { /* INTERNAL REGISTERS (18 words) */ m68ki_push_32(0); @@ -1968,7 +1972,7 @@ m68k_read_memory_8(0x00ffff01); /* Service an interrupt request and start exception processing */ -void m68ki_exception_interrupt(uint int_level) +INLINE void m68ki_exception_interrupt(uint int_level) { uint vector; uint sr; diff --git a/cpu/musashi/m68kmake.c b/cpu/musashi/m68kmake.c index f3157340..6dd57fca 100644 --- a/cpu/musashi/m68kmake.c +++ b/cpu/musashi/m68kmake.c @@ -122,6 +122,9 @@ static const char* g_version = "3.31"; #endif /* DECL_SPEC */ +#ifdef USE_LIBRETRO_VFS +#include "file_stream_transforms.h" +#endif /* ======================================================================== */ /* ============================== PROTOTYPES ============================== */ @@ -638,6 +641,9 @@ int get_oper_cycles(opcode_struct* op, int ea_mode, int cpu_type) strcmp(op->name, "suba") == 0)) return op->cycles[cpu_type] + g_ea_cycle_table[ea_mode][cpu_type][size] + 2; + if(cpu_type == CPU_TYPE_000 && ea_mode == EA_MODE_I && op->size == 8 && strcmp(op->name, "btst") == 0) + return op->cycles[cpu_type] + g_ea_cycle_table[ea_mode][cpu_type][size] + 2; + if(strcmp(op->name, "jmp") == 0) return op->cycles[cpu_type] + g_jmp_cycle_table[ea_mode]; if(strcmp(op->name, "jsr") == 0) @@ -1043,6 +1049,11 @@ void process_opcode_handlers(FILE* filep) if(opinfo == NULL) error_exit("Unable to find matching table entry for %s", func_name); +#if 1 /* PD hack: 000 only */ + if (opinfo->cpus[0] == UNSPECIFIED_CH) + continue; +#endif + replace->length = 0; /* Generate opcode variants */ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 25ba9d2f..a5d5c879 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -1,26 +1,30 @@ /* * SH2 recompiler * (C) notaz, 2009,2010,2013 + * (C) irixxxx, 2018-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. * * notes: - * - tcache, block descriptor, link buffer overflows result in sh2_translate() - * failure, followed by full tcache invalidation for that region + * - tcache, block descriptor, block entry buffer overflows result in oldest + * blocks being deleted until enough space is available + * - link and list element buffer overflows result in failure and exit * - jumps between blocks are tracked for SMC handling (in block_entry->links), - * except jumps between different tcaches + * except jumps from global to CPU-local tcaches * * implemented: * - static register allocation * - remaining register caching and tracking in temporaries * - block-local branch linking - * - block linking (except between tcaches) + * - block linking * - some constant propagation + * - call stack caching for host block entry address + * - delay, poll, and idle loop detection and handling + * - some T/M flag optimizations where the value is known or isn't used * * TODO: * - better constant propagation - * - stack caching? * - bug fixing */ #include @@ -28,7 +32,8 @@ #include #include -#include "../../pico/pico_int.h" +#include +#include #include "sh2.h" #include "compiler.h" #include "../drc/cmn.h" @@ -37,23 +42,34 @@ // features #define PROPAGATE_CONSTANTS 1 #define LINK_BRANCHES 1 +#define BRANCH_CACHE 1 +#define CALL_STACK 1 +#define ALIAS_REGISTERS 1 +#define REMAP_REGISTER 1 +#define LOOP_DETECTION 1 +#define LOOP_OPTIMIZER 1 +#define T_OPTIMIZER 1 +#define DIV_OPTIMIZER 1 -// limits (per block) -#define MAX_BLOCK_SIZE (BLOCK_INSN_LIMIT * 6 * 6) - -// max literal offset from the block end -#define MAX_LITERAL_OFFSET 32*2 -#define MAX_LITERALS (BLOCK_INSN_LIMIT / 4) -#define MAX_LOCAL_BRANCHES 32 +#define MAX_LITERAL_OFFSET 0x200 // max. MOVA, MOV @(PC) offset +#define MAX_LOCAL_TARGETS (BLOCK_INSN_LIMIT / 4) +#define MAX_LOCAL_BRANCHES (BLOCK_INSN_LIMIT / 2) // debug stuff -// 1 - warnings/errors -// 2 - block info/smc -// 4 - asm -// 8 - runtime block entry log -// { +// 01 - warnings/errors +// 02 - block info/smc +// 04 - asm +// 08 - runtime block entry log +// 10 - smc self-check +// 20 - runtime block entry counter +// 40 - rcache checking +// 80 - branch cache statistics +// 100 - write trace +// 200 - compare trace +// 400 - block entry backtrace on exit +// 800 - state dump on exit #ifndef DRC_DEBUG -#define DRC_DEBUG 0 +#define DRC_DEBUG 0//x847 #endif #if DRC_DEBUG @@ -71,6 +87,7 @@ static int insns_compiled, hash_collisions, host_insn_count; #define dbg(...) #endif + /// #define FETCH_OP(pc) \ dr_pc_base[(pc) / 2] @@ -91,13 +108,17 @@ static int insns_compiled, hash_collisions, host_insn_count; #define GET_Rn() \ ((op >> 8) & 0x0f) -#define BITMASK1(v0) (1 << (v0)) -#define BITMASK2(v0,v1) ((1 << (v0)) | (1 << (v1))) -#define BITMASK3(v0,v1,v2) (BITMASK2(v0,v1) | (1 << (v2))) -#define BITMASK4(v0,v1,v2,v3) (BITMASK3(v0,v1,v2) | (1 << (v3))) -#define BITMASK5(v0,v1,v2,v3,v4) (BITMASK4(v0,v1,v2,v3) | (1 << (v4))) +#define T 0x00000001 +#define S 0x00000002 +#define I 0x000000f0 +#define Q 0x00000100 +#define M 0x00000200 +#define T_save 0x00000800 -#define SHR_T SHR_SR // might make them separate someday +#define I_SHIFT 4 +#define Q_SHIFT 8 +#define M_SHIFT 9 +#define T_SHIFT 11 static struct op_data { u8 op; @@ -113,287 +134,454 @@ static struct op_data { enum op_types { OP_UNHANDLED = 0, OP_BRANCH, + OP_BRANCH_N, // conditional known not to be taken OP_BRANCH_CT, // conditional, branch if T set OP_BRANCH_CF, // conditional, branch if T clear OP_BRANCH_R, // indirect OP_BRANCH_RF, // indirect far (PC + Rm) OP_SETCLRT, // T flag set/clear OP_MOVE, // register move + OP_LOAD_CONST,// load const to register OP_LOAD_POOL, // literal pool load, imm is address - OP_MOVA, - OP_SLEEP, - OP_RTE, + OP_MOVA, // MOVA instruction + OP_SLEEP, // SLEEP instruction + OP_RTE, // RTE instruction + OP_TRAPA, // TRAPA instruction + OP_LDC, // LDC instruction + OP_DIV0, // DIV0[US] instruction + OP_UNDEFINED, }; -#ifdef DRC_SH2 +struct div { + u32 state:1; // 0: expect DIV1/ROTCL, 1: expect DIV1 + u32 rn:5, rm:5, ro:5; // rn and rm for DIV1, ro for ROTCL + u32 div1:8, rotcl:8; // DIV1 count, ROTCL count +}; +union _div { u32 imm; struct div div; }; // XXX tut-tut type punning... +#define div(opd) ((union _div *)&((opd)->imm))->div -static int literal_disabled_frames; +// XXX consider trap insns: OP_TRAPA, OP_UNDEFINED? +#define OP_ISBRANCH(op) ((BITRANGE(OP_BRANCH, OP_BRANCH_RF)| BITMASK1(OP_RTE)) \ + & BITMASK1(op)) +#define OP_ISBRAUC(op) (BITMASK4(OP_BRANCH, OP_BRANCH_R, OP_BRANCH_RF, OP_RTE) \ + & BITMASK1(op)) +#define OP_ISBRACND(op) (BITMASK2(OP_BRANCH_CT, OP_BRANCH_CF) \ + & BITMASK1(op)) +#define OP_ISBRAIMM(op) (BITMASK3(OP_BRANCH, OP_BRANCH_CT, OP_BRANCH_CF) \ + & BITMASK1(op)) +#define OP_ISBRAIND(op) (BITMASK3(OP_BRANCH_R, OP_BRANCH_RF, OP_RTE) \ + & BITMASK1(op)) + +#ifdef DRC_SH2 #if (DRC_DEBUG & 4) static u8 *tcache_dsm_ptrs[3]; static char sh2dasm_buff[64]; #define do_host_disasm(tcid) \ - host_dasm(tcache_dsm_ptrs[tcid], tcache_ptr - tcache_dsm_ptrs[tcid]); \ - tcache_dsm_ptrs[tcid] = tcache_ptr + host_dasm(tcache_dsm_ptrs[tcid], emith_insn_ptr() - tcache_dsm_ptrs[tcid]); \ + tcache_dsm_ptrs[tcid] = emith_insn_ptr() #else #define do_host_disasm(x) #endif -#if (DRC_DEBUG & 8) || defined(PDB) +#define SH2_DUMP(sh2, reason) { \ + char ms = (sh2)->is_slave ? 's' : 'm'; \ + printf("%csh2 %s %08lx\n", ms, reason, (ulong)(sh2)->pc); \ + printf("%csh2 r0-7 %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", ms, \ + (ulong)(sh2)->r[0], (ulong)(sh2)->r[1], (ulong)(sh2)->r[2], (ulong)(sh2)->r[3], \ + (ulong)(sh2)->r[4], (ulong)(sh2)->r[5], (ulong)(sh2)->r[6], (ulong)(sh2)->r[7]); \ + printf("%csh2 r8-15 %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", ms, \ + (ulong)(sh2)->r[8], (ulong)(sh2)->r[9], (ulong)(sh2)->r[10], (ulong)(sh2)->r[11], \ + (ulong)(sh2)->r[12], (ulong)(sh2)->r[13], (ulong)(sh2)->r[14], (ulong)(sh2)->r[15]); \ + printf("%csh2 pc-ml %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n", ms, \ + (ulong)(sh2)->pc, (ulong)(sh2)->ppc, (ulong)(sh2)->pr, (ulong)(sh2)->sr, \ + (ulong)(sh2)->gbr, (ulong)(sh2)->vbr, (ulong)(sh2)->mach, (ulong)(sh2)->macl); \ + printf("%csh2 tmp-p %08x %08x %08x %08x %08x %08lx %08x %08x\n", ms, \ + (sh2)->drc_tmp, (sh2)->irq_cycles, \ + (sh2)->pdb_io_csum[0], (sh2)->pdb_io_csum[1], (sh2)->state, \ + (ulong)(sh2)->poll_addr, (sh2)->poll_cycles, (sh2)->poll_cnt); \ +} + +#if (DRC_DEBUG & (256|512|1024)) +static SH2 csh2[2][8]; +static FILE *trace[2]; +static int topen[2]; +#endif +#if (DRC_DEBUG & 8) +static u32 lastpc, lastcnt; +static void *lastblock; +#endif +#if (DRC_DEBUG & (8|256|512|1024)) || defined(PDB) static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) { if (block != NULL) { - dbg(8, "= %csh2 enter %08x %p, c=%d", sh2->is_slave ? 's' : 'm', - sh2->pc, block, (signed int)sr >> 12); +#if defined PDB + dbg(8, "= %csh2 enter %08x %p, c=%d", sh2->is_slave?'s':'m', + sh2->pc, block, ((signed int)sr >> 12)+1); pdb_step(sh2, sh2->pc); +#elif (DRC_DEBUG & 8) + if (lastpc != sh2->pc) { + if (lastcnt) + dbg(8, "= %csh2 enter %08x %p (%d times), c=%d", sh2->is_slave?'s':'m', + lastpc, lastblock, lastcnt, (signed int)sr >> 12); + dbg(8, "= %csh2 enter %08x %p, c=%d", sh2->is_slave?'s':'m', + sh2->pc, block, (signed int)sr >> 12); + lastpc = sh2->pc; + lastblock = block; + lastcnt = 0; + } else + lastcnt++; +#elif (DRC_DEBUG & 256) + { + static SH2 fsh2; + int idx = sh2->is_slave; + if (!trace[0] && !topen[0]++) { + trace[0] = fopen("pico.trace0", "wb"); + trace[1] = fopen("pico.trace1", "wb"); + } + if (trace[idx] && csh2[idx][0].pc != sh2->pc) { + fwrite(sh2, offsetof(SH2, read8_map), 1, trace[idx]); + fwrite(&sh2->pdb_io_csum, sizeof(sh2->pdb_io_csum), 1, trace[idx]); + memcpy(&csh2[idx][0], sh2, offsetof(SH2, poll_cnt)+4); + csh2[idx][0].is_slave = idx; + } + } +#elif (DRC_DEBUG & 512) + { + static SH2 fsh2; + int idx = sh2->is_slave; + if (!trace[0] && !topen[0]++) { + trace[0] = fopen("pico.trace0", "rb"); + trace[1] = fopen("pico.trace1", "rb"); + } + if (trace[idx] && csh2[idx][0].pc != sh2->pc) { + if (!fread(&fsh2, offsetof(SH2, read8_map), 1, trace[idx]) || + !fread(&fsh2.pdb_io_csum, sizeof(sh2->pdb_io_csum), 1, trace[idx])) { + printf("trace eof at %08lx\n",ftell(trace[idx])); + exit(1); + } + fsh2.sr = (fsh2.sr & 0x3ff) | (sh2->sr & ~0x3ff); + fsh2.is_slave = idx; + if (memcmp(&fsh2, sh2, offsetof(SH2, read8_map)) || + 0)//memcmp(&fsh2.pdb_io_csum, &sh2->pdb_io_csum, sizeof(sh2->pdb_io_csum))) + { + printf("difference at %08lx!\n",ftell(trace[idx])); + SH2_DUMP(&fsh2, "file"); + SH2_DUMP(sh2, "current"); + SH2_DUMP(&csh2[idx][0], "previous"); + SH2_DUMP(&csh2[idx][1], "previous"); + char *ps = (char *)sh2, *pf = (char *)&fsh2; + for (idx = 0; idx < offsetof(SH2, read8_map); idx += sizeof(u32)) + if (*(u32 *)(ps+idx) != *(u32 *)(pf+idx)) + printf("diff reg %ld\n",(long)idx/sizeof(u32)); + exit(1); + } + memcpy(&csh2[idx][1], &csh2[idx][0], offsetof(SH2, poll_cnt)+4); + csh2[idx][0] = fsh2; + } + } +#elif (DRC_DEBUG & 1024) + { + int x = sh2->is_slave, i; + for (i = 0; i < ARRAY_SIZE(csh2[x])-1; i++) + memcpy(&csh2[x][i], &csh2[x][i+1], offsetof(SH2, poll_cnt)+4); + memcpy(&csh2[x][ARRAY_SIZE(csh2[x])-1], sh2, offsetof(SH2, poll_cnt)+4); + csh2[x][0].is_slave = x; + } +#endif } return block; } #endif -// } debug -#define TCACHE_BUFFERS 3 // we have 3 translation cache buffers, split from one drc/cmn buffer. // BIOS shares tcache with data array because it's only used for init // and can be discarded early -// XXX: need to tune sizes -static const int tcache_sizes[TCACHE_BUFFERS] = { - DRC_TCACHE_SIZE * 6 / 8, // ROM (rarely used), DRAM - DRC_TCACHE_SIZE / 8, // BIOS, data array in master sh2 - DRC_TCACHE_SIZE / 8, // ... slave +#define TCACHE_BUFFERS 3 + + +struct ring_buffer { + u8 *base; // ring buffer memory + unsigned item_sz; // size of one buffer item + unsigned size; // number of itmes in ring + int first, next; // read and write pointers + int used; // number of used items in ring }; -static u8 *tcache_bases[TCACHE_BUFFERS]; -static u8 *tcache_ptrs[TCACHE_BUFFERS]; - -// ptr for code emiters -static u8 *tcache_ptr; - -#define MAX_BLOCK_ENTRIES (BLOCK_INSN_LIMIT / 8) - +enum { BL_JMP=1, BL_LDJMP, BL_JCCBLX }; struct block_link { + short tcache_id; + short type; // BL_JMP et al u32 target_pc; void *jump; // insn address - struct block_link *next; // either in block_entry->links or + void *blx; // block link/exit area if any + u8 jdisp[12]; // jump backup buffer + struct block_link *next; // either in block_entry->links or unresolved + struct block_link *o_next; // ...in block_entry->o_links + struct block_link *prev; + struct block_link *o_prev; + struct block_entry *target;// target block this is linked in (be->links) }; struct block_entry { u32 pc; - void *tcache_ptr; // translated block for above PC - struct block_entry *next; // next block in hash_table with same pc hash - struct block_link *links; // links to this entry + u8 *tcache_ptr; // translated block for above PC + struct block_entry *next; // chain in hash_table with same pc hash + struct block_entry *prev; + struct block_link *links; // incoming links to this entry + struct block_link *o_links;// outgoing links from this entry #if (DRC_DEBUG & 2) struct block_desc *block; #endif +#if (DRC_DEBUG & 32) + int entry_count; +#endif }; struct block_desc { u32 addr; // block start SH2 PC address - u16 size; // ..of recompiled insns+lit. pool - u16 size_nolit; // same without literals + u32 addr_lit; // block start SH2 literal pool addr + int size; // ..of recompiled insns + int size_lit; // ..of (insns+)literal pool + u8 *tcache_ptr; // start address of block in cache + u16 crc; // crc of insns and literals + u16 active; // actively used or deactivated? + struct block_list *list; #if (DRC_DEBUG & 2) int refcount; #endif int entry_count; - struct block_entry entryp[MAX_BLOCK_ENTRIES]; + struct block_entry *entryp; }; -static const int block_max_counts[TCACHE_BUFFERS] = { - 4*1024, - 256, - 256, -}; -static struct block_desc *block_tables[TCACHE_BUFFERS]; -static int block_counts[TCACHE_BUFFERS]; - -// we have block_link_pool to avoid using mallocs -static const int block_link_pool_max_counts[TCACHE_BUFFERS] = { - 4*1024, - 256, - 256, -}; -static struct block_link *block_link_pool[TCACHE_BUFFERS]; -static int block_link_pool_counts[TCACHE_BUFFERS]; -static struct block_link *unresolved_links[TCACHE_BUFFERS]; - -// used for invalidation -static const int ram_sizes[TCACHE_BUFFERS] = { - 0x40000, - 0x1000, - 0x1000, -}; -#define INVAL_PAGE_SIZE 0x100 - struct block_list { - struct block_desc *block; - struct block_list *next; + struct block_desc *block; // block reference + struct block_list *next; // pointers for doubly linked list + struct block_list *prev; + struct block_list **head; // list head (for removing from list) + struct block_list *l_next; }; +static u8 *tcache_ptr; // ptr for code emitters + +// XXX: need to tune sizes + +static struct ring_buffer tcache_ring[TCACHE_BUFFERS]; +static const int tcache_sizes[TCACHE_BUFFERS] = { + DRC_TCACHE_SIZE * 30 / 32, // ROM (rarely used), DRAM + DRC_TCACHE_SIZE / 32, // BIOS, data array in master sh2 + DRC_TCACHE_SIZE / 32, // ... slave +}; + +#define BLOCK_MAX_COUNT(tcid) ((tcid) ? 256 : 32*256) +static struct ring_buffer block_ring[TCACHE_BUFFERS]; +static struct block_desc *block_tables[TCACHE_BUFFERS]; + +#define ENTRY_MAX_COUNT(tcid) ((tcid) ? 8*512 : 256*512) +static struct ring_buffer entry_ring[TCACHE_BUFFERS]; +static struct block_entry *entry_tables[TCACHE_BUFFERS]; + +// we have block_link_pool to avoid using mallocs +#define BLOCK_LINK_MAX_COUNT(tcid) ((tcid) ? 512 : 32*512) +static struct block_link *block_link_pool[TCACHE_BUFFERS]; +static int block_link_pool_counts[TCACHE_BUFFERS]; +static struct block_link **unresolved_links[TCACHE_BUFFERS]; +static struct block_link *blink_free[TCACHE_BUFFERS]; + +// used for invalidation +#define RAM_SIZE(tcid) ((tcid) ? 0x1000 : 0x40000) +#define INVAL_PAGE_SIZE 0x100 + +static struct block_list *inactive_blocks[TCACHE_BUFFERS]; + // array of pointers to block_lists for RAM and 2 data arrays // each array has len: sizeof(mem) / INVAL_PAGE_SIZE static struct block_list **inval_lookup[TCACHE_BUFFERS]; -static const int hash_table_sizes[TCACHE_BUFFERS] = { - 0x1000, - 0x100, - 0x100, -}; +#define HASH_TABLE_SIZE(tcid) ((tcid) ? 512 : 32*512) static struct block_entry **hash_tables[TCACHE_BUFFERS]; #define HASH_FUNC(hash_tab, addr, mask) \ - (hash_tab)[(((addr) >> 20) ^ ((addr) >> 2)) & (mask)] + (hash_tab)[((addr) >> 1) & (mask)] + +#define BLOCK_LIST_MAX_COUNT (64*1024) +static struct block_list *block_list_pool; +static int block_list_pool_count; +static struct block_list *blist_free; + +#if (DRC_DEBUG & 128) +#if BRANCH_CACHE +int bchit, bcmiss; +#endif +#if CALL_STACK +int rchit, rcmiss; +#endif +#endif // host register tracking -enum { +enum cache_reg_htype { + HRT_TEMP = 1, // is for temps and args + HRT_REG = 2, // is for sh2 regs +}; + +enum cache_reg_flags { + HRF_DIRTY = 1 << 0, // has "dirty" value to be written to ctx + HRF_PINNED = 1 << 1, // has a pinned mapping + HRF_S16 = 1 << 2, // has a sign extended 16 bit value + HRF_U16 = 1 << 3, // has a zero extended 16 bit value +}; + +enum cache_reg_type { HR_FREE, - HR_CACHED, // 'val' has sh2_reg_e -// HR_CONST, // 'val' has a constant + HR_CACHED, // vreg has sh2_reg_e HR_TEMP, // reg used for temp storage }; -enum { - HRF_DIRTY = 1 << 0, // reg has "dirty" value to be written to ctx - HRF_LOCKED = 1 << 1, // HR_CACHED can't be evicted +typedef struct { + u8 hreg:6; // "host" reg + u8 htype:2; // TEMP or REG? + u8 flags:4; // DIRTY, PINNED? + u8 type:2; // CACHED or TEMP? + u8 locked:2; // LOCKED reference counter + u16 stamp; // kind of a timestamp + u32 gregs; // "guest" reg mask +} cache_reg_t; + +// guest register tracking +enum guest_reg_flags { + GRF_DIRTY = 1 << 0, // reg has "dirty" value to be written to ctx + GRF_CONST = 1 << 1, // reg has a constant + GRF_CDIRTY = 1 << 2, // constant not yet written to ctx + GRF_STATIC = 1 << 3, // reg has static mapping to vreg + GRF_PINNED = 1 << 4, // reg has pinned mapping to vreg }; typedef struct { - u32 hreg:5; // "host" reg - u32 greg:5; // "guest" reg - u32 type:3; - u32 flags:3; - u32 stamp:16; // kind of a timestamp -} temp_reg_t; + u8 flags; // guest flags: is constant, is dirty? + s8 sreg; // cache reg for static mapping + s8 vreg; // cache_reg this is currently mapped to, -1 if not mapped + s8 cnst; // const index if this is constant +} guest_reg_t; -// note: reg_temp[] must have at least the amount of -// registers used by handlers in worst case (currently 4) -#ifdef __arm__ + +// possibly needed in code emitter +static int rcache_get_tmp(void); +static void rcache_free_tmp(int hr); + +// Note: Register assignment goes by ABI convention. Caller save registers are +// TEMPORARY, callee save registers are PRESERVED. Unusable regs are omitted. +// there must be at least the free (not context or statically mapped) amount of +// PRESERVED/TEMPORARY registers used by handlers in worst case (currently 4). +// there must be at least 3 PARAM, and PARAM+TEMPORARY must be at least 4. +// SR must and R0 should by all means be statically mapped. +// XXX the static definition of SR MUST match that in compiler.h + +#if defined(__arm__) || defined(_M_ARM) #include "../drc/emit_arm.c" - -#ifndef __MACH__ - -static const int reg_map_g2h[] = { - 4, 5, 6, 7, - 8, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, 9, // r12 .. sp - -1, -1, -1, 10, // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, - -1, -1, -1, -1, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, -}; - -#else - -// no r9.. -static const int reg_map_g2h[] = { - 4, 5, 6, 7, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, 8, // r12 .. sp - -1, -1, -1, 10, // SHR_PC, SHR_PPC, SHR_PR, SHR_SR, - -1, -1, -1, -1, // SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, -}; - -#endif - -static temp_reg_t reg_temp[] = { - { 0, }, - { 1, }, - { 12, }, - { 14, }, - { 2, }, - { 3, }, -}; - -#elif defined(__i386__) +#elif defined(__aarch64__) || defined(_M_ARM64) +#include "../drc/emit_arm64.c" +#elif defined(__mips__) +#include "../drc/emit_mips.c" +#elif defined(__riscv__) || defined(__riscv) +#include "../drc/emit_riscv.c" +#elif defined(__powerpc__) || defined(__PPC__) || defined(__ppc__) || defined(_M_PPC) +#include "../drc/emit_ppc.c" +#elif defined(__i386__) || defined(_M_X86) +#include "../drc/emit_x86.c" +#elif defined(__x86_64__) || defined(_M_X64) #include "../drc/emit_x86.c" - -static const int reg_map_g2h[] = { - xSI,-1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, -1, - -1, -1, -1, xDI, - -1, -1, -1, -1, -}; - -// ax, cx, dx are usually temporaries by convention -static temp_reg_t reg_temp[] = { - { xAX, }, - { xBX, }, - { xCX, }, - { xDX, }, -}; - #else #error unsupported arch #endif -#define T 0x00000001 -#define S 0x00000002 -#define I 0x000000f0 -#define Q 0x00000100 -#define M 0x00000200 -#define T_save 0x00000800 +static const signed char hregs_param[] = PARAM_REGS; +static const signed char hregs_temp [] = TEMPORARY_REGS; +static const signed char hregs_saved[] = PRESERVED_REGS; +static const signed char regs_static[] = STATIC_SH2_REGS; -#define I_SHIFT 4 -#define Q_SHIFT 8 -#define M_SHIFT 9 +#define CACHE_REGS \ + (ARRAY_SIZE(hregs_param)+ARRAY_SIZE(hregs_temp)+ARRAY_SIZE(hregs_saved)-1) +static cache_reg_t cache_regs[CACHE_REGS]; +static signed char reg_map_host[HOST_REGS]; + +static guest_reg_t guest_regs[SH2_REGS]; + +// generated functions called from C, to be called only through host_call() static void REGPARM(1) (*sh2_drc_entry)(SH2 *sh2); -static void (*sh2_drc_dispatcher)(void); -static void (*sh2_drc_exit)(void); +#ifdef DRC_SR_REG +void REGPARM(1) (*sh2_drc_save_sr)(SH2 *sh2); +void REGPARM(1) (*sh2_drc_restore_sr)(SH2 *sh2); +#endif + +// generated DRC helper functions, only called from generated code via emith_call*() +static void REGPARM(1) (*sh2_drc_dispatcher)(u32 pc); +#if CALL_STACK +static u32 REGPARM(2) (*sh2_drc_dispatcher_call)(u32 pc); +static void REGPARM(1) (*sh2_drc_dispatcher_return)(u32 pc); +#endif +static void REGPARM(1) (*sh2_drc_exit)(u32 pc); static void (*sh2_drc_test_irq)(void); -static u32 REGPARM(2) (*sh2_drc_read8)(u32 a, SH2 *sh2); -static u32 REGPARM(2) (*sh2_drc_read16)(u32 a, SH2 *sh2); -static u32 REGPARM(2) (*sh2_drc_read32)(u32 a, SH2 *sh2); +static u32 REGPARM(1) (*sh2_drc_read8)(u32 a); +static u32 REGPARM(1) (*sh2_drc_read16)(u32 a); +static u32 REGPARM(1) (*sh2_drc_read32)(u32 a); +static u32 REGPARM(1) (*sh2_drc_read8_poll)(u32 a); +static u32 REGPARM(1) (*sh2_drc_read16_poll)(u32 a); +static u32 REGPARM(1) (*sh2_drc_read32_poll)(u32 a); static void REGPARM(2) (*sh2_drc_write8)(u32 a, u32 d); static void REGPARM(2) (*sh2_drc_write16)(u32 a, u32 d); -static void REGPARM(3) (*sh2_drc_write32)(u32 a, u32 d, SH2 *sh2); +static void REGPARM(2) (*sh2_drc_write32)(u32 a, u32 d); + +// flags for memory access +#define MF_SIZEMASK 0x03 // size of access +#define MF_POSTINCR 0x10 // post increment (for read_rr) +#define MF_PREDECR MF_POSTINCR // pre decrement (for write_rr) +#define MF_POLLING 0x20 // include polling check in read // address space stuff -static int dr_ctx_get_mem_ptr(u32 a, u32 *mask) +static int dr_ctx_get_mem_ptr(SH2 *sh2, u32 a, u32 *mask) { + void *memptr; int poffs = -1; - if ((a & ~0x7ff) == 0) { - // BIOS + // check if region is mapped memory + memptr = p32x_sh2_get_mem_ptr(a, mask, sh2); + if (memptr == NULL) + return poffs; + + if (memptr == sh2->p_bios) // BIOS poffs = offsetof(SH2, p_bios); - *mask = 0x7ff; - } - else if ((a & 0xfffff000) == 0xc0000000) { - // data array - // FIXME: access sh2->data_array instead + else if (memptr == sh2->p_da) // data array poffs = offsetof(SH2, p_da); - *mask = 0xfff; - } - else if ((a & 0xc6000000) == 0x06000000) { - // SDRAM + else if (memptr == sh2->p_sdram) // SDRAM poffs = offsetof(SH2, p_sdram); - *mask = 0x03ffff; - } - else if ((a & 0xc6000000) == 0x02000000) { - // ROM + else if (memptr == sh2->p_rom) // ROM poffs = offsetof(SH2, p_rom); - *mask = 0x3fffff; - } return poffs; } +static int dr_get_tcache_id(u32 pc, int is_slave) +{ + u32 tcid = 0; + + if ((pc & 0xe0000000) == 0xc0000000) + tcid = 1 + is_slave; // data array + if ((pc & ~0xfff) == 0) + tcid = 1 + is_slave; // BIOS + return tcid; +} + static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id) { struct block_entry *be; - u32 tcid = 0, mask; + + *tcache_id = dr_get_tcache_id(pc, is_slave); - // data arrays have their own caches - if ((pc & 0xe0000000) == 0xc0000000 || (pc & ~0xfff) == 0) - tcid = 1 + is_slave; - - *tcache_id = tcid; - - mask = hash_table_sizes[tcid] - 1; - be = HASH_FUNC(hash_tables[tcid], pc, mask); + be = HASH_FUNC(hash_tables[*tcache_id], pc, HASH_TABLE_SIZE(*tcache_id) - 1); + if (be != NULL) // don't ask... gcc code generation hint for (; be != NULL; be = be->next) if (be->pc == pc) return be; @@ -403,87 +591,148 @@ static struct block_entry *dr_get_entry(u32 pc, int is_slave, int *tcache_id) // --------------------------------------------------------------- +// ring buffer management +#define RING_INIT(r,m,n) *(r) = (struct ring_buffer) { .base = (u8 *)m, \ + .item_sz = sizeof(*(m)), .size = n }; + +static void *ring_alloc(struct ring_buffer *rb, int count) +{ + // allocate space in ring buffer + void *p; + + p = rb->base + rb->next * rb->item_sz; + if (rb->next+count > rb->size) { + rb->used += rb->size - rb->next; + p = rb->base; // wrap if overflow at end + rb->next = count; + } else { + rb->next += count; + if (rb->next == rb->size) rb->next = 0; + } + + rb->used += count; + return p; +} + +static void ring_wrap(struct ring_buffer *rb) +{ + // insufficient space at end of buffer memory, wrap around + rb->used += rb->size - rb->next; + rb->next = 0; +} + +static void ring_free(struct ring_buffer *rb, int count) +{ + // free oldest space in ring buffer + rb->first += count; + if (rb->first >= rb->size) rb->first -= rb->size; + + rb->used -= count; +} + +static void ring_free_p(struct ring_buffer *rb, void *p) +{ + // free ring buffer space upto given pointer + rb->first = ((u8 *)p - rb->base) / rb->item_sz; + + rb->used = rb->next - rb->first; + if (rb->used < 0) rb->used += rb->size; +} + +static void *ring_reset(struct ring_buffer *rb) +{ + // reset to initial state + rb->first = rb->next = rb->used = 0; + return rb->base + rb->next * rb->item_sz; +} + +static void *ring_first(struct ring_buffer *rb) +{ + return rb->base + rb->first * rb->item_sz; +} + +static void *ring_next(struct ring_buffer *rb) +{ + return rb->base + rb->next * rb->item_sz; +} + + // block management static void add_to_block_list(struct block_list **blist, struct block_desc *block) { - struct block_list *added = malloc(sizeof(*added)); - if (!added) { - elprintf(EL_ANOMALY, "drc OOM (1)"); - return; + struct block_list *added; + + if (blist_free) { + added = blist_free; + blist_free = added->next; + } else if (block_list_pool_count >= BLOCK_LIST_MAX_COUNT) { + printf( "block list overflow\n"); + exit(1); + } else { + added = block_list_pool + block_list_pool_count; + block_list_pool_count ++; } + added->block = block; + added->l_next = block->list; + block->list = added; + added->head = blist; + + added->prev = NULL; + if (*blist) + (*blist)->prev = added; added->next = *blist; *blist = added; } -static void rm_from_block_list(struct block_list **blist, struct block_desc *block) +static void rm_from_block_lists(struct block_desc *block) { - struct block_list *prev = NULL, *current = *blist; - for (; current != NULL; prev = current, current = current->next) { - if (current->block == block) { - if (prev == NULL) - *blist = current->next; - else - prev->next = current->next; - free(current); - return; - } + struct block_list *entry; + + entry = block->list; + while (entry != NULL) { + if (entry->prev != NULL) + entry->prev->next = entry->next; + else + *(entry->head) = entry->next; + if (entry->next != NULL) + entry->next->prev = entry->prev; + + entry->next = blist_free; + blist_free = entry; + + entry = entry->l_next; } - dbg(1, "can't rm block %p (%08x-%08x)", - block, block->addr, block->addr + block->size); + block->list = NULL; } -static void rm_block_list(struct block_list **blist) +static void discard_block_list(struct block_list **blist) { - struct block_list *tmp, *current = *blist; + struct block_list *next, *current = *blist; while (current != NULL) { - tmp = current; - current = current->next; - free(tmp); + next = current->next; + current->next = blist_free; + blist_free = current; + current = next; } *blist = NULL; } -static void REGPARM(1) flush_tcache(int tcid) -{ - int i; - - dbg(1, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, - tcache_ptrs[tcid] - tcache_bases[tcid], tcache_sizes[tcid], - block_counts[tcid], block_max_counts[tcid]); - - block_counts[tcid] = 0; - block_link_pool_counts[tcid] = 0; - unresolved_links[tcid] = NULL; - memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * hash_table_sizes[tcid]); - tcache_ptrs[tcid] = tcache_bases[tcid]; - if (Pico32xMem != NULL) { - if (tcid == 0) // ROM, RAM - memset(Pico32xMem->drcblk_ram, 0, - sizeof(Pico32xMem->drcblk_ram)); - else - memset(Pico32xMem->drcblk_da[tcid - 1], 0, - sizeof(Pico32xMem->drcblk_da[0])); - } -#if (DRC_DEBUG & 4) - tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; -#endif - - for (i = 0; i < ram_sizes[tcid] / INVAL_PAGE_SIZE; i++) - rm_block_list(&inval_lookup[tcid][i]); -} - static void add_to_hashlist(struct block_entry *be, int tcache_id) { - u32 tcmask = hash_table_sizes[tcache_id] - 1; + u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1; + struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask); - be->next = HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask); - HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask) = be; + be->prev = NULL; + if (*head) + (*head)->prev = be; + be->next = *head; + *head = be; #if (DRC_DEBUG & 2) if (be->next != NULL) { - printf(" %08x: hash collision with %08x\n", - be->pc, be->next->pc); + printf(" %08lx@%p: entry hash collision with %08lx@%p\n", + (ulong)be->pc, be->tcache_ptr, (ulong)be->next->pc, be->next->tcache_ptr); hash_collisions++; } #endif @@ -491,75 +740,457 @@ static void add_to_hashlist(struct block_entry *be, int tcache_id) static void rm_from_hashlist(struct block_entry *be, int tcache_id) { - u32 tcmask = hash_table_sizes[tcache_id] - 1; - struct block_entry *cur, *prev; - - cur = HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask); - if (cur == NULL) - goto missing; + u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1; + struct block_entry **head = &HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask); - if (be == cur) { // first - HASH_FUNC(hash_tables[tcache_id], be->pc, tcmask) = be->next; +#if DRC_DEBUG & 1 + struct block_entry *current = be; + while (current->prev != NULL) + current = current->prev; + if (current != *head) + dbg(1, "rm_from_hashlist @%p: be %p %08x missing?", head, be, be->pc); +#endif + + if (be->prev != NULL) + be->prev->next = be->next; + else + *head = be->next; + if (be->next != NULL) + be->next->prev = be->prev; +} + + +#if LINK_BRANCHES +static void add_to_hashlist_unresolved(struct block_link *bl, int tcache_id) +{ + u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1; + struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask); + +#if DRC_DEBUG & 1 + struct block_link *current = *head; + while (current != NULL && current != bl) + current = current->next; + if (current == bl) + dbg(1, "add_to_hashlist_unresolved @%p: bl %p %p %08x already in?", head, bl, bl->target, bl->target_pc); +#endif + + bl->target = NULL; // marker for not resolved + bl->prev = NULL; + if (*head) + (*head)->prev = bl; + bl->next = *head; + *head = bl; +} + +static void rm_from_hashlist_unresolved(struct block_link *bl, int tcache_id) +{ + u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1; + struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], bl->target_pc, tcmask); + +#if DRC_DEBUG & 1 + struct block_link *current = bl; + while (current->prev != NULL) + current = current->prev; + if (current != *head) + dbg(1, "rm_from_hashlist_unresolved @%p: bl %p %p %08x missing?", head, bl, bl->target, bl->target_pc); +#endif + + if (bl->prev != NULL) + bl->prev->next = bl->next; + else + *head = bl->next; + if (bl->next != NULL) + bl->next->prev = bl->prev; +} + +static void dr_block_link(struct block_entry *be, struct block_link *bl, int emit_jump) +{ + dbg(2, "- %slink from %p to pc %08x entry %p", emit_jump ? "":"early ", + bl->jump, bl->target_pc, be->tcache_ptr); + + if (emit_jump) { + u8 *jump = bl->jump; + int jsz = emith_jump_patch_size(); + if (bl->type == BL_JMP) { // patch: jump @entry + // inlined: @jump far jump to target + emith_jump_patch(jump, be->tcache_ptr, &jump); + } else if (bl->type == BL_LDJMP) { // write: jump @entry + // inlined: @jump far jump to target + emith_jump_at(jump, be->tcache_ptr); + jsz = emith_jump_at_size(); + } else if (bl->type == BL_JCCBLX) { // patch: jump cond -> jump @entry + if (emith_jump_patch_inrange(bl->jump, be->tcache_ptr)) { + // inlined: @jump near jumpcc to target + emith_jump_patch(jump, be->tcache_ptr, &jump); + } else { // dispatcher cond immediate + // via blx: @jump near jumpcc to blx; @blx far jump + emith_jump_patch(jump, bl->blx, &jump); + emith_jump_at(bl->blx, be->tcache_ptr); + host_instructions_updated(bl->blx, (char *)bl->blx + emith_jump_at_size(), + ((uintptr_t)bl->blx & 0x1f) + emith_jump_at_size()-1 > 0x1f); + } + } else { + printf("unknown BL type %d\n", bl->type); + exit(1); + } + host_instructions_updated(jump, jump + jsz, ((uintptr_t)jump & 0x1f) + jsz-1 > 0x1f); + } + + // move bl to block_entry + bl->target = be; + bl->prev = NULL; + if (be->links) + be->links->prev = bl; + bl->next = be->links; + be->links = bl; +} + +static void dr_block_unlink(struct block_link *bl, int emit_jump) +{ + dbg(2,"- unlink from %p to pc %08x", bl->jump, bl->target_pc); + + if (bl->target) { + if (emit_jump) { + u8 *jump = bl->jump; + int jsz = emith_jump_patch_size(); + if (bl->type == BL_JMP) { // jump_patch @dispatcher + // inlined: @jump far jump to dispatcher + emith_jump_patch(jump, sh2_drc_dispatcher, &jump); + } else if (bl->type == BL_LDJMP) { // restore: load pc, jump @dispatcher + // inlined: @jump load target_pc, far jump to dispatcher + memcpy(jump, bl->jdisp, emith_jump_at_size()); + jsz = emith_jump_at_size(); + } else if (bl->type == BL_JCCBLX) { // jump cond @blx; @blx: load pc, jump + // via blx: @jump near jumpcc to blx; @blx load target_pc, far jump + emith_jump_patch(bl->jump, bl->blx, &jump); + memcpy(bl->blx, bl->jdisp, emith_jump_at_size()); + host_instructions_updated(bl->blx, (char *)bl->blx + emith_jump_at_size(), 1); + } else { + printf("unknown BL type %d\n", bl->type); + exit(1); + } + // update cpu caches since the previous jump target doesn't exist anymore + host_instructions_updated(jump, jump + jsz, 1); + } + + if (bl->prev) + bl->prev->next = bl->next; + else + bl->target->links = bl->next; + if (bl->next) + bl->next->prev = bl->prev; + bl->target = NULL; + } +} +#endif + +static struct block_link *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_slave, int tcache_id) +{ +#if LINK_BRANCHES + struct block_link *bl = block_link_pool[tcache_id]; + int cnt = block_link_pool_counts[tcache_id]; + int target_tcache_id; + + // get the target block entry + target_tcache_id = dr_get_tcache_id(pc, is_slave); + if (target_tcache_id && target_tcache_id != tcache_id) + return NULL; + + // get a block link + if (blink_free[tcache_id] != NULL) { + bl = blink_free[tcache_id]; + blink_free[tcache_id] = bl->next; + } else if (cnt >= BLOCK_LINK_MAX_COUNT(tcache_id)) { + dbg(1, "bl overflow for tcache %d", tcache_id); + return NULL; + } else { + bl += cnt; + block_link_pool_counts[tcache_id] = cnt+1; + } + + // prepare link and add to outgoing list of owner + bl->tcache_id = tcache_id; + bl->target_pc = pc; + bl->jump = tcache_ptr; + bl->blx = NULL; + bl->o_next = owner->o_links; + owner->o_links = bl; + + add_to_hashlist_unresolved(bl, tcache_id); + return bl; +#else + return NULL; +#endif +} + +static void dr_mark_memory(int mark, struct block_desc *block, int tcache_id, u32 nolit) +{ + u8 *drc_ram_blk = NULL, *lit_ram_blk = NULL; + u32 addr, end, mask = 0, shift = 0, idx; + + // mark memory blocks as containing compiled code + if ((block->addr & 0xc7fc0000) == 0x06000000 + || (block->addr & 0xfffff000) == 0xc0000000) + { + if (tcache_id != 0) { + // data array + drc_ram_blk = Pico32xMem->drcblk_da[tcache_id-1]; + lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1]; + shift = SH2_DRCBLK_DA_SHIFT; + } + else { + // SDRAM + drc_ram_blk = Pico32xMem->drcblk_ram; + lit_ram_blk = Pico32xMem->drclit_ram; + shift = SH2_DRCBLK_RAM_SHIFT; + } + mask = RAM_SIZE(tcache_id) - 1; + + // mark recompiled insns + addr = block->addr & ~((1 << shift) - 1); + end = block->addr + block->size; + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + drc_ram_blk[idx++] += mark; + + // mark literal pool + if (addr < (block->addr_lit & ~((1 << shift) - 1))) + addr = block->addr_lit & ~((1 << shift) - 1); + end = block->addr_lit + block->size_lit; + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + drc_ram_blk[idx++] += mark; + + // mark for literals disabled + if (nolit) { + addr = nolit & ~((1 << shift) - 1); + end = block->addr_lit + block->size_lit; + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + lit_ram_blk[idx++] = 1; + } + + if (mark < 0) + rm_from_block_lists(block); + else { + // add to invalidation lookup lists + addr = block->addr & ~(INVAL_PAGE_SIZE - 1); + end = block->addr + block->size; + for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE) + add_to_block_list(&inval_lookup[tcache_id][idx++], block); + + if (addr < (block->addr_lit & ~(INVAL_PAGE_SIZE - 1))) + addr = block->addr_lit & ~(INVAL_PAGE_SIZE - 1); + end = block->addr_lit + block->size_lit; + for (idx = (addr & mask) / INVAL_PAGE_SIZE; addr < end; addr += INVAL_PAGE_SIZE) + add_to_block_list(&inval_lookup[tcache_id][idx++], block); + } + } +} + +static u32 dr_check_nolit(u32 start, u32 end, int tcache_id) +{ + u8 *lit_ram_blk = NULL; + u32 mask = 0, shift = 0, addr, idx; + + if ((start & 0xc7fc0000) == 0x06000000 + || (start & 0xfffff000) == 0xc0000000) + { + if (tcache_id != 0) { + // data array + lit_ram_blk = Pico32xMem->drclit_da[tcache_id-1]; + shift = SH2_DRCBLK_DA_SHIFT; + } + else { + // SDRAM + lit_ram_blk = Pico32xMem->drclit_ram; + shift = SH2_DRCBLK_RAM_SHIFT; + } + mask = RAM_SIZE(tcache_id) - 1; + + addr = start & ~((1 << shift) - 1); + for (idx = (addr & mask) >> shift; addr < end; addr += (1 << shift)) + if (lit_ram_blk[idx++]) + break; + + return (addr < start ? start : addr > end ? end : addr); + } + + return end; +} + +static void dr_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free) +{ + struct block_link *bl; + u32 i; + + free = free || nolit; // block is invalid if literals are overwritten + dbg(2," %sing block %08x-%08x,%08x-%08x, blkid %d,%d", free?"delet":"disabl", + bd->addr, bd->addr + bd->size, bd->addr_lit, bd->addr_lit + bd->size_lit, + tcache_id, bd - block_tables[tcache_id]); + if (bd->addr == 0 || bd->entry_count == 0) { + dbg(1, " killing dead block!? %08x", bd->addr); return; } - for (prev = cur, cur = cur->next; cur != NULL; cur = cur->next) { - if (cur == be) { - prev->next = cur->next; - return; - } - } + // remove from hash table, make incoming links unresolved + if (bd->active) { + for (i = 0; i < bd->entry_count; i++) { + rm_from_hashlist(&bd->entryp[i], tcache_id); -missing: - dbg(1, "rm_from_hashlist: be %p %08x missing?", be, be->pc); +#if LINK_BRANCHES + while ((bl = bd->entryp[i].links) != NULL) { + dr_block_unlink(bl, 1); + add_to_hashlist_unresolved(bl, tcache_id); + } +#endif + } + + dr_mark_memory(-1, bd, tcache_id, nolit); + add_to_block_list(&inactive_blocks[tcache_id], bd); + } + bd->active = 0; + + if (free) { +#if LINK_BRANCHES + // revoke outgoing links + for (bl = bd->entryp[0].o_links; bl != NULL; bl = bl->o_next) { + if (bl->target) + dr_block_unlink(bl, 0); + else + rm_from_hashlist_unresolved(bl, tcache_id); + bl->jump = NULL; + bl->next = blink_free[bl->tcache_id]; + blink_free[bl->tcache_id] = bl; + } + bd->entryp[0].o_links = NULL; +#endif + // invalidate block + rm_from_block_lists(bd); + bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; + bd->entry_count = 0; + } + emith_update_cache(); } -static struct block_desc *dr_add_block(u32 addr, u16 size_lit, - u16 size_nolit, int is_slave, int *blk_id) +static struct block_desc *dr_find_inactive_block(int tcache_id, u16 crc, + u32 addr, int size, u32 addr_lit, int size_lit) +{ + struct block_list **head = &inactive_blocks[tcache_id]; + struct block_list *current; + + for (current = *head; current != NULL; current = current->next) { + struct block_desc *block = current->block; + if (block->crc == crc && block->addr == addr && block->size == size && + block->addr_lit == addr_lit && block->size_lit == size_lit) + { + rm_from_block_lists(block); + return block; + } + } + return NULL; +} + +static struct block_desc *dr_add_block(int entries, u32 addr, int size, + u32 addr_lit, int size_lit, u16 crc, int is_slave, int *blk_id) { struct block_entry *be; struct block_desc *bd; int tcache_id; - int *bcount; // do a lookup to get tcache_id and override check be = dr_get_entry(addr, is_slave, &tcache_id); if (be != NULL) dbg(1, "block override for %08x", addr); - bcount = &block_counts[tcache_id]; - if (*bcount >= block_max_counts[tcache_id]) { + if (block_ring[tcache_id].used + 1 > block_ring[tcache_id].size || + entry_ring[tcache_id].used + entries > entry_ring[tcache_id].size) { dbg(1, "bd overflow for tcache %d", tcache_id); return NULL; } - bd = &block_tables[tcache_id][*bcount]; - bd->addr = addr; - bd->size = size_lit; - bd->size_nolit = size_nolit; + *blk_id = block_ring[tcache_id].next; + bd = ring_alloc(&block_ring[tcache_id], 1); + bd->entryp = ring_alloc(&entry_ring[tcache_id], entries); - bd->entry_count = 1; - bd->entryp[0].pc = addr; - bd->entryp[0].tcache_ptr = tcache_ptr; - bd->entryp[0].links = NULL; + bd->addr = addr; + bd->size = size; + bd->addr_lit = addr_lit; + bd->size_lit = size_lit; + bd->tcache_ptr = tcache_ptr; + bd->crc = crc; + bd->active = 0; + bd->list = NULL; + bd->entry_count = 0; #if (DRC_DEBUG & 2) - bd->entryp[0].block = bd; bd->refcount = 0; #endif - add_to_hashlist(&bd->entryp[0], tcache_id); - - *blk_id = *bcount; - (*bcount)++; return bd; } -static void REGPARM(3) *dr_lookup_block(u32 pc, int is_slave, int *tcache_id) +static void dr_link_blocks(struct block_entry *be, int tcache_id) +{ +#if LINK_BRANCHES + u32 tcmask = HASH_TABLE_SIZE(tcache_id) - 1; + u32 pc = be->pc; + struct block_link **head = &HASH_FUNC(unresolved_links[tcache_id], pc, tcmask); + struct block_link *bl = *head, *next; + + while (bl != NULL) { + next = bl->next; + if (bl->target_pc == pc && (!bl->tcache_id || bl->tcache_id == tcache_id)) { + rm_from_hashlist_unresolved(bl, bl->tcache_id); + dr_block_link(be, bl, 1); + } + bl = next; + } +#endif +} + +static void dr_link_outgoing(struct block_entry *be, int tcache_id, int is_slave) +{ +#if LINK_BRANCHES + struct block_link *bl; + int target_tcache_id; + + for (bl = be->o_links; bl; bl = bl->o_next) { + if (bl->target == NULL) { + be = dr_get_entry(bl->target_pc, is_slave, &target_tcache_id); + if (be != NULL && (!target_tcache_id || target_tcache_id == tcache_id)) { + // remove bl from unresolved_links (must've been since target was NULL) + rm_from_hashlist_unresolved(bl, bl->tcache_id); + dr_block_link(be, bl, 1); + } + } + } +#endif +} + +static void dr_activate_block(struct block_desc *bd, int tcache_id, int is_slave) +{ + int i; + + // connect branches + for (i = 0; i < bd->entry_count; i++) { + struct block_entry *entry = &bd->entryp[i]; + add_to_hashlist(entry, tcache_id); + // incoming branches + dr_link_blocks(entry, tcache_id); + if (!tcache_id) + dr_link_blocks(entry, is_slave?2:1); + // outgoing branches + dr_link_outgoing(entry, tcache_id, is_slave); + } + + // mark memory for overwrite detection + dr_mark_memory(1, bd, tcache_id, 0); + bd->active = 1; +} + +static void REGPARM(3) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id) { struct block_entry *be = NULL; void *block = NULL; - be = dr_get_entry(pc, is_slave, tcache_id); + be = dr_get_entry(pc, sh2->is_slave, tcache_id); if (be != NULL) block = be->tcache_ptr; @@ -570,107 +1201,136 @@ static void REGPARM(3) *dr_lookup_block(u32 pc, int is_slave, int *tcache_id) return block; } +static void dr_free_oldest_block(int tcache_id) +{ + struct block_desc *bf; + + bf = ring_first(&block_ring[tcache_id]); + if (bf->addr && bf->entry_count) + dr_rm_block_entry(bf, tcache_id, 0, 1); + ring_free(&block_ring[tcache_id], 1); + + if (block_ring[tcache_id].used) { + bf = ring_first(&block_ring[tcache_id]); + ring_free_p(&entry_ring[tcache_id], bf->entryp); + ring_free_p(&tcache_ring[tcache_id], bf->tcache_ptr); + } else { + // reset since size of code block isn't known if no successor block exists + ring_reset(&block_ring[tcache_id]); + ring_reset(&entry_ring[tcache_id]); + ring_reset(&tcache_ring[tcache_id]); + } +} + +static inline void dr_reserve_cache(int tcache_id, struct ring_buffer *rb, int count) +{ + // while not enough space available + if (rb->next + count >= rb->size){ + // not enough space in rest of buffer -> wrap around + while (rb->first >= rb->next && rb->used) + dr_free_oldest_block(tcache_id); + if (rb->first == 0 && rb->used) + dr_free_oldest_block(tcache_id); + ring_wrap(rb); + } + while (rb->first >= rb->next && rb->next + count > rb->first && rb->used) + dr_free_oldest_block(tcache_id); +} + +static u8 *dr_prepare_cache(int tcache_id, int insn_count, int entry_count) +{ + int bf = block_ring[tcache_id].first; + + // reserve one block desc + if (block_ring[tcache_id].used >= block_ring[tcache_id].size) + dr_free_oldest_block(tcache_id); + // reserve block entries + dr_reserve_cache(tcache_id, &entry_ring[tcache_id], entry_count); + // reserve cache space + dr_reserve_cache(tcache_id, &tcache_ring[tcache_id], insn_count*128); + + if (bf != block_ring[tcache_id].first) { + // deleted some block(s), clear branch cache and return stack +#if BRANCH_CACHE + if (tcache_id) + memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + else { + memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); + } +#endif +#if CALL_STACK + if (tcache_id) { + memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + sh2s[tcache_id-1].rts_cache_idx = 0; + } else { + memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; + } +#endif + } + + return ring_next(&tcache_ring[tcache_id]); +} + +static void dr_flush_tcache(int tcid) +{ + int i; +#if (DRC_DEBUG & 1) + elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d bes %d/%d)", tcid, + tcache_ring[tcid].used, tcache_ring[tcid].size, block_ring[tcid].used, + block_ring[tcid].size, entry_ring[tcid].used, entry_ring[tcid].size); +#endif + + ring_reset(&tcache_ring[tcid]); + ring_reset(&block_ring[tcid]); + ring_reset(&entry_ring[tcid]); + + block_link_pool_counts[tcid] = 0; + blink_free[tcid] = NULL; + memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * HASH_TABLE_SIZE(tcid)); + memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * HASH_TABLE_SIZE(tcid)); + + if (tcid == 0) { // ROM, RAM + memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); + memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); + memset(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)); + memset(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)); + memset(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)); + memset(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; + } else { + memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); + memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); + memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[tcid - 1])); + memset(Pico32xMem->drclit_da[tcid - 1], 0, sizeof(Pico32xMem->drclit_da[tcid - 1])); + memset(sh2s[tcid - 1].branch_cache, -1, sizeof(sh2s[0].branch_cache)); + memset(sh2s[tcid - 1].rts_cache, -1, sizeof(sh2s[0].rts_cache)); + sh2s[tcid - 1].rts_cache_idx = 0; + } +#if (DRC_DEBUG & 4) + tcache_dsm_ptrs[tcid] = tcache_ring[tcid].base; +#endif + + for (i = 0; i < RAM_SIZE(tcid) / INVAL_PAGE_SIZE; i++) + discard_block_list(&inval_lookup[tcid][i]); + discard_block_list(&inactive_blocks[tcid]); +} + static void *dr_failure(void) { - lprintf("recompilation failed\n"); + printf("recompilation failed\n"); exit(1); } -static void *dr_prepare_ext_branch(u32 pc, int is_slave, int tcache_id) -{ -#if LINK_BRANCHES - struct block_link *bl = block_link_pool[tcache_id]; - int cnt = block_link_pool_counts[tcache_id]; - struct block_entry *be = NULL; - int target_tcache_id; - int i; - - be = dr_get_entry(pc, is_slave, &target_tcache_id); - if (target_tcache_id != tcache_id) - return sh2_drc_dispatcher; - - // if pool has been freed, reuse - for (i = cnt - 1; i >= 0; i--) - if (bl[i].target_pc != 0) - break; - cnt = i + 1; - if (cnt >= block_link_pool_max_counts[tcache_id]) { - dbg(1, "bl overflow for tcache %d", tcache_id); - return NULL; - } - bl += cnt; - block_link_pool_counts[tcache_id]++; - - bl->target_pc = pc; - bl->jump = tcache_ptr; - - if (be != NULL) { - dbg(2, "- early link from %p to pc %08x", bl->jump, pc); - bl->next = be->links; - be->links = bl; - return be->tcache_ptr; - } - else { - bl->next = unresolved_links[tcache_id]; - unresolved_links[tcache_id] = bl; - return sh2_drc_dispatcher; - } -#else - return sh2_drc_dispatcher; -#endif -} - -static void dr_link_blocks(struct block_entry *be, int tcache_id) -{ -#if LINK_BRANCHES - struct block_link *first = unresolved_links[tcache_id]; - struct block_link *bl, *prev, *tmp; - u32 pc = be->pc; - - for (bl = prev = first; bl != NULL; ) { - if (bl->target_pc == pc) { - dbg(2, "- link from %p to pc %08x", bl->jump, pc); - emith_jump_patch(bl->jump, tcache_ptr); - - // move bl from unresolved_links to block_entry - tmp = bl->next; - bl->next = be->links; - be->links = bl; - - if (bl == first) - first = prev = bl = tmp; - else - prev->next = bl = tmp; - continue; - } - prev = bl; - bl = bl->next; - } - unresolved_links[tcache_id] = first; - - // could sync arm caches here, but that's unnecessary -#endif -} - -#define ADD_TO_ARRAY(array, count, item, failcode) \ - if (count >= ARRAY_SIZE(array)) { \ - dbg(1, "warning: " #array " overflow"); \ - failcode; \ - } \ - array[count++] = item; - -static int find_in_array(u32 *array, size_t size, u32 what) -{ - size_t i; - for (i = 0; i < size; i++) - if (what == array[i]) - return i; - - return -1; -} - // --------------------------------------------------------------- +// NB rcache allocation dependencies: +// - get_reg_arg/get_tmp_arg first (might evict other regs just allocated) +// - get_reg(..., NULL) before get_reg(..., &hr) if it might get the same reg +// - get_reg(..., RC_GR_READ/RMW, ...) before WRITE (might evict needed reg) + // register cache / constant propagation stuff typedef enum { RC_GR_READ, @@ -678,380 +1338,1177 @@ typedef enum { RC_GR_RMW, } rc_gr_mode; -static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking); +typedef struct { + u32 gregs; + u32 val; +} gconst_t; -// guest regs with constants -static u32 dr_gcregs[24]; -// a mask of constant/dirty regs -static u32 dr_gcregs_mask; -static u32 dr_gcregs_dirty; +gconst_t gconsts[ARRAY_SIZE(guest_regs)]; -#if PROPAGATE_CONSTANTS -static void gconst_new(sh2_reg_e r, u32 val) +static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr); +static inline int rcache_is_cached(sh2_reg_e r); +static void rcache_add_vreg_alias(int x, sh2_reg_e r); +static void rcache_remove_vreg_alias(int x, sh2_reg_e r); +static void rcache_evict_vreg(int x); +static void rcache_remap_vreg(int x); +static int rcache_get_reg(sh2_reg_e r, rc_gr_mode mode, int *hr); + +static void rcache_set_x16(int hr, int s16_, int u16_) { - int i; - - dr_gcregs_mask |= 1 << r; - dr_gcregs_dirty |= 1 << r; - dr_gcregs[r] = val; - - // throw away old r that we might have cached - for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) { - if ((reg_temp[i].type == HR_CACHED) && - reg_temp[i].greg == r) { - reg_temp[i].type = HR_FREE; - reg_temp[i].flags = 0; - } + int x = reg_map_host[hr]; + if (x >= 0) { + cache_regs[x].flags &= ~(HRF_S16|HRF_U16); + if (s16_) cache_regs[x].flags |= HRF_S16; + if (u16_) cache_regs[x].flags |= HRF_U16; } } -#endif + +static void rcache_copy_x16(int hr, int hr2) +{ + int x = reg_map_host[hr], y = reg_map_host[hr2]; + if (x >= 0 && y >= 0) { + cache_regs[x].flags = (cache_regs[x].flags & ~(HRF_S16|HRF_U16)) | + (cache_regs[y].flags & (HRF_S16|HRF_U16)); + } +} + +static int rcache_is_s16(int hr) +{ + int x = reg_map_host[hr]; + return (x >= 0 ? cache_regs[x].flags & HRF_S16 : 0); +} + +static int rcache_is_u16(int hr) +{ + int x = reg_map_host[hr]; + return (x >= 0 ? cache_regs[x].flags & HRF_U16 : 0); +} + +#define RCACHE_DUMP(msg) { \ + cache_reg_t *cp; \ + guest_reg_t *gp; \ + int i; \ + printf("cache dump %s:\n",msg); \ + printf(" cache_regs:\n"); \ + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \ + cp = &cache_regs[i]; \ + if (cp->type != HR_FREE || cp->gregs || cp->locked || cp->flags) \ + printf(" %d: hr=%d t=%d f=%x c=%d m=%lx\n", i, cp->hreg, cp->type, cp->flags, cp->locked, (ulong)cp->gregs); \ + } \ + printf(" guest_regs:\n"); \ + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { \ + gp = &guest_regs[i]; \ + if (gp->vreg != -1 || gp->sreg >= 0 || gp->flags) \ + printf(" %d: v=%d f=%x s=%d c=%d\n", i, gp->vreg, gp->flags, gp->sreg, gp->cnst); \ + } \ + printf(" gconsts:\n"); \ + for (i = 0; i < ARRAY_SIZE(gconsts); i++) { \ + if (gconsts[i].gregs) \ + printf(" %d: m=%lx v=%lx\n", i, (ulong)gconsts[i].gregs, (ulong)gconsts[i].val); \ + } \ +} + +#define RCACHE_CHECK(msg) { \ + cache_reg_t *cp; \ + guest_reg_t *gp; \ + int i, x, m = 0, d = 0; \ + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \ + cp = &cache_regs[i]; \ + if (cp->flags & HRF_PINNED) m |= (1 << i); \ + if (cp->type == HR_FREE || cp->type == HR_TEMP) continue; \ + /* check connectivity greg->vreg */ \ + FOR_ALL_BITS_SET_DO(cp->gregs, x, \ + if (guest_regs[x].vreg != i) \ + { d = 1; printf("cache check v=%d r=%d not connected?\n",i,x); } \ + ) \ + } \ + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { \ + gp = &guest_regs[i]; \ + if (gp->vreg != -1 && !(cache_regs[gp->vreg].gregs & (1 << i))) \ + { d = 1; printf("cache check r=%d v=%d not connected?\n", i, gp->vreg); }\ + if (gp->vreg != -1 && cache_regs[gp->vreg].type != HR_CACHED) \ + { d = 1; printf("cache check r=%d v=%d wrong type?\n", i, gp->vreg); }\ + if ((gp->flags & GRF_CONST) && !(gconsts[gp->cnst].gregs & (1 << i))) \ + { d = 1; printf("cache check r=%d c=%d not connected?\n", i, gp->cnst); }\ + if ((gp->flags & GRF_CDIRTY) && (gp->vreg != -1 || !(gp->flags & GRF_CONST)))\ + { d = 1; printf("cache check r=%d CDIRTY?\n", i); } \ + if (gp->flags & (GRF_STATIC|GRF_PINNED)) { \ + if (gp->sreg == -1 || !(cache_regs[gp->sreg].flags & HRF_PINNED))\ + { d = 1; printf("cache check r=%d v=%d not pinned?\n", i, gp->vreg); } \ + else m &= ~(1 << gp->sreg); \ + } \ + } \ + for (i = 0; i < ARRAY_SIZE(gconsts); i++) { \ + FOR_ALL_BITS_SET_DO(gconsts[i].gregs, x, \ + if (guest_regs[x].cnst != i || !(guest_regs[x].flags & GRF_CONST)) \ + { d = 1; printf("cache check c=%d v=%d not connected?\n",i,x); } \ + ) \ + } \ + if (m) \ + { d = 1; printf("cache check m=%x pinning wrong?\n",m); } \ + if (d) RCACHE_DUMP(msg) \ +/* else { \ + printf("locked regs %s:\n",msg); \ + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { \ + cp = &cache_regs[i]; \ + if (cp->locked) \ + printf(" %d: hr=%d t=%d f=%x c=%d m=%x\n", i, cp->hreg, cp->type, cp->flags, cp->locked, cp->gregs); \ + } \ + } */ \ +} + +static inline int gconst_alloc(sh2_reg_e r) +{ + int i, n = -1; + + for (i = 0; i < ARRAY_SIZE(gconsts); i++) { + gconsts[i].gregs &= ~(1 << r); + if (gconsts[i].gregs == 0 && n < 0) + n = i; + } + if (n >= 0) + gconsts[n].gregs = (1 << r); + else { + printf("all gconst buffers in use, aborting\n"); + exit(1); // cannot happen - more constants than guest regs? + } + return n; +} + +static void gconst_set(sh2_reg_e r, u32 val) +{ + int i = gconst_alloc(r); + + guest_regs[r].flags |= GRF_CONST; + guest_regs[r].cnst = i; + gconsts[i].val = val; +} + +static void gconst_new(sh2_reg_e r, u32 val) +{ + gconst_set(r, val); + guest_regs[r].flags |= GRF_CDIRTY; + + // throw away old r that we might have cached + if (guest_regs[r].vreg >= 0) + rcache_remove_vreg_alias(guest_regs[r].vreg, r); +} static int gconst_get(sh2_reg_e r, u32 *val) { - if (dr_gcregs_mask & (1 << r)) { - *val = dr_gcregs[r]; + if (guest_regs[r].flags & GRF_CONST) { + *val = gconsts[guest_regs[r].cnst].val; return 1; } + *val = 0; return 0; } static int gconst_check(sh2_reg_e r) { - if ((dr_gcregs_mask | dr_gcregs_dirty) & (1 << r)) + if (guest_regs[r].flags & (GRF_CONST|GRF_CDIRTY)) return 1; return 0; } // update hr if dirty, else do nothing -static int gconst_try_read(int hr, sh2_reg_e r) +static int gconst_try_read(int vreg, sh2_reg_e r) { - if (dr_gcregs_dirty & (1 << r)) { - emith_move_r_imm(hr, dr_gcregs[r]); - dr_gcregs_dirty &= ~(1 << r); + int i, x; + u32 v; + + if (guest_regs[r].flags & GRF_CDIRTY) { + x = guest_regs[r].cnst; + v = gconsts[x].val; + emith_move_r_imm(cache_regs[vreg].hreg, v); + rcache_set_x16(cache_regs[vreg].hreg, v == (s16)v, v == (u16)v); + FOR_ALL_BITS_SET_DO(gconsts[x].gregs, i, + { + if (guest_regs[i].vreg >= 0 && guest_regs[i].vreg != vreg) + rcache_remove_vreg_alias(guest_regs[i].vreg, i); + if (guest_regs[i].vreg < 0) + rcache_add_vreg_alias(vreg, i); + guest_regs[i].flags &= ~GRF_CDIRTY; + guest_regs[i].flags |= GRF_DIRTY; + }); + cache_regs[vreg].type = HR_CACHED; + cache_regs[vreg].flags |= HRF_DIRTY; return 1; } return 0; } -static void gconst_check_evict(sh2_reg_e r) +static u32 gconst_dirty_mask(void) { - if (dr_gcregs_mask & (1 << r)) - // no longer cached in reg, make dirty again - dr_gcregs_dirty |= 1 << r; + u32 mask = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if (guest_regs[i].flags & GRF_CDIRTY) + mask |= (1 << i); + return mask; } static void gconst_kill(sh2_reg_e r) { - dr_gcregs_mask &= ~(1 << r); - dr_gcregs_dirty &= ~(1 << r); + if (guest_regs[r].flags & (GRF_CONST|GRF_CDIRTY)) + gconsts[guest_regs[r].cnst].gregs &= ~(1 << r); + guest_regs[r].flags &= ~(GRF_CONST|GRF_CDIRTY); +} + +static void gconst_copy(sh2_reg_e rd, sh2_reg_e rs) +{ + gconst_kill(rd); + if (guest_regs[rs].flags & GRF_CONST) { + guest_regs[rd].flags |= GRF_CONST; + if (guest_regs[rd].vreg < 0) + guest_regs[rd].flags |= GRF_CDIRTY; + guest_regs[rd].cnst = guest_regs[rs].cnst; + gconsts[guest_regs[rd].cnst].gregs |= (1 << rd); + } } static void gconst_clean(void) { int i; - for (i = 0; i < ARRAY_SIZE(dr_gcregs); i++) - if (dr_gcregs_dirty & (1 << i)) { + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if (guest_regs[i].flags & GRF_CDIRTY) { // using RC_GR_READ here: it will call gconst_try_read, // cache the reg and mark it dirty. - rcache_get_reg_(i, RC_GR_READ, 0); + rcache_get_reg_(i, RC_GR_READ, 0, NULL); } } static void gconst_invalidate(void) { - dr_gcregs_mask = dr_gcregs_dirty = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { + if (guest_regs[i].flags & (GRF_CONST|GRF_CDIRTY)) + gconsts[guest_regs[i].cnst].gregs &= ~(1 << i); + guest_regs[i].flags &= ~(GRF_CONST|GRF_CDIRTY); + } } -static u16 rcache_counter; -static temp_reg_t *rcache_evict(void) +static u16 rcache_counter; +// SH2 register usage bitmasks +static u32 rcache_vregs_reg; // regs of type HRT_REG (for pinning) +static u32 rcache_regs_static; // statically allocated regs +static u32 rcache_regs_pinned; // pinned regs +static u32 rcache_regs_now; // regs used in current insn +static u32 rcache_regs_soon; // regs used in the next few insns +static u32 rcache_regs_late; // regs used in later insns +static u32 rcache_regs_discard; // regs overwritten without being used +static u32 rcache_regs_clean; // regs needing cleaning + +static void rcache_lock_vreg(int x) { - // evict reg with oldest stamp - int i, oldest = -1; + if (x >= 0) { + cache_regs[x].locked ++; +#if DRC_DEBUG & 64 + if (cache_regs[x].type == HR_FREE) { + printf("locking free vreg %x, aborting\n", x); + exit(1); + } + if (!cache_regs[x].locked) { + printf("locking overflow vreg %x, aborting\n", x); + exit(1); + } +#endif + } +} + +static void rcache_unlock_vreg(int x) +{ + if (x >= 0) { +#if DRC_DEBUG & 64 + if (cache_regs[x].type == HR_FREE) { + printf("unlocking free vreg %x, aborting\n", x); + exit(1); + } +#endif + if (cache_regs[x].locked) + cache_regs[x].locked --; + } +} + +static void rcache_free_vreg(int x) +{ + cache_regs[x].type = cache_regs[x].locked ? HR_TEMP : HR_FREE; + cache_regs[x].flags &= HRF_PINNED; + cache_regs[x].gregs = 0; +} + +static void rcache_unmap_vreg(int x) +{ + int i; + + FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, i, + if (guest_regs[i].flags & GRF_DIRTY) { + // if a dirty reg is unmapped save its value to context + if ((~rcache_regs_discard | rcache_regs_now) & (1 << i)) + emith_ctx_write(cache_regs[x].hreg, i * 4); + guest_regs[i].flags &= ~GRF_DIRTY; + } + guest_regs[i].vreg = -1); + rcache_free_vreg(x); +} + +static void rcache_move_vreg(int d, int x) +{ + int i; + + cache_regs[d].type = HR_CACHED; + cache_regs[d].gregs = cache_regs[x].gregs; + cache_regs[d].flags &= HRF_PINNED; + cache_regs[d].flags |= cache_regs[x].flags & ~HRF_PINNED; + cache_regs[d].locked = 0; + cache_regs[d].stamp = cache_regs[x].stamp; + emith_move_r_r(cache_regs[d].hreg, cache_regs[x].hreg); + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if (guest_regs[i].vreg == x) + guest_regs[i].vreg = d; + rcache_free_vreg(x); +} + +static void rcache_clean_vreg(int x) +{ + u32 rns = rcache_regs_now | rcache_regs_soon; + int r; + + if (cache_regs[x].flags & HRF_DIRTY) { // writeback + cache_regs[x].flags &= ~HRF_DIRTY; + rcache_lock_vreg(x); + FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, r, + if (guest_regs[r].flags & GRF_DIRTY) { + if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) { + if (guest_regs[r].vreg != guest_regs[r].sreg && + !cache_regs[guest_regs[r].sreg].locked && + ((~rcache_regs_discard | rcache_regs_now) & (1 << r)) && + !(rns & cache_regs[guest_regs[r].sreg].gregs)) { + // statically mapped reg not in its sreg. move back to sreg + rcache_evict_vreg(guest_regs[r].sreg); + emith_move_r_r(cache_regs[guest_regs[r].sreg].hreg, + cache_regs[guest_regs[r].vreg].hreg); + rcache_copy_x16(cache_regs[guest_regs[r].sreg].hreg, + cache_regs[guest_regs[r].vreg].hreg); + rcache_remove_vreg_alias(x, r); + rcache_add_vreg_alias(guest_regs[r].sreg, r); + cache_regs[guest_regs[r].sreg].flags |= HRF_DIRTY; + } else + // cannot remap. keep dirty for writeback in unmap + cache_regs[x].flags |= HRF_DIRTY; + } else { + if ((~rcache_regs_discard | rcache_regs_now) & (1 << r)) + emith_ctx_write(cache_regs[x].hreg, r * 4); + guest_regs[r].flags &= ~GRF_DIRTY; + } + rcache_regs_clean &= ~(1 << r); + }) + rcache_unlock_vreg(x); + } + +#if DRC_DEBUG & 64 + RCACHE_CHECK("after clean"); +#endif +} + +static void rcache_add_vreg_alias(int x, sh2_reg_e r) +{ + cache_regs[x].gregs |= (1 << r); + guest_regs[r].vreg = x; + cache_regs[x].type = HR_CACHED; +} + +static void rcache_remove_vreg_alias(int x, sh2_reg_e r) +{ + cache_regs[x].gregs &= ~(1 << r); + if (!cache_regs[x].gregs) { + // no reg mapped -> free vreg + if (cache_regs[x].locked) + cache_regs[x].type = HR_TEMP; + else + rcache_free_vreg(x); + } + guest_regs[r].vreg = -1; +} + +static void rcache_evict_vreg(int x) +{ + rcache_remap_vreg(x); + rcache_unmap_vreg(x); +} + +static void rcache_evict_vreg_aliases(int x, sh2_reg_e r) +{ + rcache_remove_vreg_alias(x, r); + rcache_evict_vreg(x); + rcache_add_vreg_alias(x, r); +} + +static int rcache_allocate(int what, int minprio) +{ + // evict reg with oldest stamp (only for HRT_REG, no temps) + int i, i_prio, oldest = -1, prio = 0; u16 min_stamp = (u16)-1; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) { - if (reg_temp[i].type == HR_CACHED && !(reg_temp[i].flags & HRF_LOCKED) && - reg_temp[i].stamp <= min_stamp) { - min_stamp = reg_temp[i].stamp; + for (i = ARRAY_SIZE(cache_regs)-1; i >= 0; i--) { + // consider only non-static, unpinned, unlocked REG or TEMP + if ((cache_regs[i].flags & HRF_PINNED) || cache_regs[i].locked) + continue; + if ((what > 0 && !(cache_regs[i].htype & HRT_REG)) || // get a REG + (what == 0 && (cache_regs[i].htype & HRT_TEMP)) || // get a non-TEMP + (what < 0 && !(cache_regs[i].htype & HRT_TEMP))) // get a TEMP + continue; + if (cache_regs[i].type == HR_FREE || cache_regs[i].type == HR_TEMP) { + // REG is free + prio = 10; oldest = i; + break; + } + if (cache_regs[i].type == HR_CACHED) { + if (rcache_regs_now & cache_regs[i].gregs) + // REGs needed for the current insn + i_prio = 0; + else if (rcache_regs_soon & cache_regs[i].gregs) + // REGs needed in the next insns + i_prio = 2; + else if (rcache_regs_late & cache_regs[i].gregs) + // REGs needed in some future insn + i_prio = 4; + else if (~rcache_regs_discard & cache_regs[i].gregs) + // REGs not needed in the foreseeable future + i_prio = 6; + else + // REGs soon overwritten anyway + i_prio = 8; + if (!(cache_regs[i].flags & HRF_DIRTY)) i_prio ++; + + if (prio < i_prio || (prio == i_prio && cache_regs[i].stamp < min_stamp)) { + min_stamp = cache_regs[i].stamp; + oldest = i; + prio = i_prio; + } } } - if (oldest == -1) { - printf("no registers to evict, aborting\n"); + + if (prio < minprio || oldest == -1) + return -1; + + if (cache_regs[oldest].type == HR_CACHED) + rcache_evict_vreg(oldest); + else + rcache_free_vreg(oldest); + + return oldest; +} + +static int rcache_allocate_vreg(int needed) +{ + int x; + + x = rcache_allocate(1, needed ? 0 : 4); + if (x < 0) + x = rcache_allocate(-1, 0); + return x; +} + +static int rcache_allocate_nontemp(void) +{ + int x = rcache_allocate(0, 4); + return x; +} + +static int rcache_allocate_temp(void) +{ + int x = rcache_allocate(-1, 0); + if (x < 0) + x = rcache_allocate(0, 0); + return x; +} + +// maps a host register to a REG +static int rcache_map_reg(sh2_reg_e r, int hr) +{ +#if REMAP_REGISTER + int i; + + gconst_kill(r); + + // lookup the TEMP hr maps to + i = reg_map_host[hr]; + if (i < 0) { + // must not happen + printf("invalid host register %d\n", hr); exit(1); } - i = oldest; - if (reg_temp[i].type == HR_CACHED) { - if (reg_temp[i].flags & HRF_DIRTY) - // writeback - emith_ctx_write(reg_temp[i].hreg, reg_temp[i].greg * 4); - gconst_check_evict(reg_temp[i].greg); - } - - reg_temp[i].type = HR_FREE; - reg_temp[i].flags = 0; - return ®_temp[i]; + // remove old mappings of r and i if one exists + if (guest_regs[r].vreg >= 0) + rcache_remove_vreg_alias(guest_regs[r].vreg, r); + if (cache_regs[i].type == HR_CACHED) + rcache_evict_vreg(i); + // set new mappping + cache_regs[i].type = HR_CACHED; + cache_regs[i].gregs = 1 << r; + cache_regs[i].locked = 0; + cache_regs[i].stamp = ++rcache_counter; + cache_regs[i].flags |= HRF_DIRTY; + rcache_lock_vreg(i); + guest_regs[r].flags |= GRF_DIRTY; + guest_regs[r].vreg = i; +#if DRC_DEBUG & 64 + RCACHE_CHECK("after map"); +#endif + return cache_regs[i].hreg; +#else + return rcache_get_reg(r, RC_GR_WRITE, NULL); +#endif } -static int get_reg_static(sh2_reg_e r, rc_gr_mode mode) +// remap vreg from a TEMP to a REG if it will be used (upcoming TEMP invalidation) +static void rcache_remap_vreg(int x) { - int i = reg_map_g2h[r]; - if (i != -1) { - if (mode != RC_GR_WRITE) - gconst_try_read(i, r); +#if REMAP_REGISTER + u32 rsl_d = rcache_regs_soon | rcache_regs_late; + int d; + + // x must be a cached vreg + if (cache_regs[x].type != HR_CACHED || cache_regs[x].locked) + return; + // don't do it if x isn't used + if (!(rsl_d & cache_regs[x].gregs)) { + // clean here to avoid data loss on invalidation + rcache_clean_vreg(x); + return; } - return i; + + FOR_ALL_BITS_SET_DO(cache_regs[x].gregs, d, + if ((guest_regs[d].flags & (GRF_STATIC|GRF_PINNED)) && + !cache_regs[guest_regs[d].sreg].locked && + !((rsl_d|rcache_regs_now) & cache_regs[guest_regs[d].sreg].gregs)) { + // STATIC not in its sreg and sreg is available + rcache_evict_vreg(guest_regs[d].sreg); + rcache_move_vreg(guest_regs[d].sreg, x); + return; + } + ) + + // allocate a non-TEMP vreg + rcache_lock_vreg(x); // lock to avoid evicting x + d = rcache_allocate_nontemp(); + rcache_unlock_vreg(x); + if (d < 0) { + rcache_clean_vreg(x); + return; + } + + // move vreg to new location + rcache_move_vreg(d, x); +#if DRC_DEBUG & 64 + RCACHE_CHECK("after remap"); +#endif +#else + rcache_clean_vreg(x); +#endif +} + +static void rcache_alias_vreg(sh2_reg_e rd, sh2_reg_e rs) +{ +#if ALIAS_REGISTERS + int x; + + // if s isn't constant, it must be in cache for aliasing + if (!gconst_check(rs)) + rcache_get_reg_(rs, RC_GR_READ, 0, NULL); + + // if d and s are not already aliased + x = guest_regs[rs].vreg; + if (guest_regs[rd].vreg != x) { + // remove possible old mapping of dst + if (guest_regs[rd].vreg >= 0) + rcache_remove_vreg_alias(guest_regs[rd].vreg, rd); + // make dst an alias of src + if (x >= 0) + rcache_add_vreg_alias(x, rd); + // if d is now in cache, it must be dirty + if (guest_regs[rd].vreg >= 0) { + x = guest_regs[rd].vreg; + cache_regs[x].flags |= HRF_DIRTY; + guest_regs[rd].flags |= GRF_DIRTY; + } + } + + gconst_copy(rd, rs); +#if DRC_DEBUG & 64 + RCACHE_CHECK("after alias"); +#endif +#else + int hr_s = rcache_get_reg(rs, RC_GR_READ, NULL); + int hr_d = rcache_get_reg(rd, RC_GR_WRITE, NULL); + + emith_move_r_r(hr_d, hr_s); + gconst_copy(rd, rs); +#endif } // note: must not be called when doing conditional code -static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking) +static int rcache_get_reg_(sh2_reg_e r, rc_gr_mode mode, int do_locking, int *hr) { - temp_reg_t *tr; - int i, ret; + int src, dst, ali; + cache_reg_t *tr; + u32 rsp_d = (rcache_regs_soon | rcache_regs_static | rcache_regs_pinned) & + ~rcache_regs_discard; - // maybe statically mapped? - ret = get_reg_static(r, mode); - if (ret != -1) - goto end; + dst = src = guest_regs[r].vreg; - rcache_counter++; - - // maybe already cached? - // if so, prefer against gconst (they must be in sync) - for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) { - if (reg_temp[i].type == HR_CACHED && reg_temp[i].greg == r) { - reg_temp[i].stamp = rcache_counter; - if (mode != RC_GR_READ) - reg_temp[i].flags |= HRF_DIRTY; - ret = reg_temp[i].hreg; - goto end; + rcache_lock_vreg(src); // lock to avoid evicting src + // good opportunity to relocate a remapped STATIC? + if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) && + src != guest_regs[r].sreg && (src < 0 || mode != RC_GR_READ) && + !cache_regs[guest_regs[r].sreg].locked && + !((rsp_d|rcache_regs_now) & cache_regs[guest_regs[r].sreg].gregs)) { + dst = guest_regs[r].sreg; + rcache_evict_vreg(dst); + } else if (dst < 0) { + // allocate a cache register + if ((dst = rcache_allocate_vreg(rsp_d & (1 << r))) < 0) { + printf("no registers to evict, aborting\n"); + exit(1); } } - - // use any free reg - for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) { - if (reg_temp[i].type == HR_FREE) { - tr = ®_temp[i]; - goto do_alloc; - } - } - - tr = rcache_evict(); - -do_alloc: - tr->type = HR_CACHED; - if (do_locking) - tr->flags |= HRF_LOCKED; - if (mode != RC_GR_READ) - tr->flags |= HRF_DIRTY; - tr->greg = r; + tr = &cache_regs[dst]; tr->stamp = rcache_counter; - ret = tr->hreg; + // remove r from src + if (src >= 0 && src != dst) + rcache_remove_vreg_alias(src, r); + rcache_unlock_vreg(src); - if (mode != RC_GR_WRITE) { - if (gconst_check(r)) { - if (gconst_try_read(ret, r)) - tr->flags |= HRF_DIRTY; + // if r has a constant it may have aliases + if (mode != RC_GR_WRITE && gconst_try_read(dst, r)) + src = dst; + + // if r will be modified, check for aliases being needed rsn + ali = tr->gregs & ~(1 << r); + if (mode != RC_GR_READ && src == dst && ali) { + int x = -1; + if ((rsp_d|rcache_regs_now) & ali) { + if ((guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) && + guest_regs[r].sreg == dst && !tr->locked) { + // split aliases if r is STATIC in sreg and dst isn't already locked + int t; + FOR_ALL_BITS_SET_DO(ali, t, + if ((guest_regs[t].flags & (GRF_STATIC|GRF_PINNED)) && + !(ali & ~(1 << t)) && + !cache_regs[guest_regs[t].sreg].locked && + !((rsp_d|rcache_regs_now) & cache_regs[guest_regs[t].sreg].gregs)) { + // alias is a single STATIC and its sreg is available + x = guest_regs[t].sreg; + rcache_evict_vreg(x); + } else { + rcache_lock_vreg(dst); // lock to avoid evicting dst + x = rcache_allocate_vreg(rsp_d & ali); + rcache_unlock_vreg(dst); + } + break; + ) + if (x >= 0) { + rcache_remove_vreg_alias(src, r); + src = dst; + rcache_move_vreg(x, dst); + } + } else { + // split r + rcache_lock_vreg(src); // lock to avoid evicting src + x = rcache_allocate_vreg(rsp_d & (1 << r)); + rcache_unlock_vreg(src); + if (x >= 0) { + rcache_remove_vreg_alias(src, r); + dst = x; + tr = &cache_regs[dst]; + tr->stamp = rcache_counter; + } + } } - else - emith_ctx_read(tr->hreg, r * 4); + if (x < 0) + // aliases not needed or no vreg available, remove them + rcache_evict_vreg_aliases(dst, r); } -end: - if (mode != RC_GR_READ) - gconst_kill(r); + // assign r to dst + rcache_add_vreg_alias(dst, r); - return ret; + // handle dst register transfer + if (src < 0 && mode != RC_GR_WRITE) + emith_ctx_read(tr->hreg, r * 4); + if (hr) { + *hr = (src >= 0 ? cache_regs[src].hreg : tr->hreg); + rcache_lock_vreg(src >= 0 ? src : dst); + } else if (src >= 0 && mode != RC_GR_WRITE && cache_regs[src].hreg != tr->hreg) + emith_move_r_r(tr->hreg, cache_regs[src].hreg); + + // housekeeping + if (do_locking) + rcache_lock_vreg(dst); + if (mode != RC_GR_READ) { + tr->flags |= HRF_DIRTY; + guest_regs[r].flags |= GRF_DIRTY; + gconst_kill(r); + rcache_set_x16(tr->hreg, 0, 0); + } else if (src >= 0 && cache_regs[src].hreg != tr->hreg) + rcache_copy_x16(tr->hreg, cache_regs[src].hreg); +#if DRC_DEBUG & 64 + RCACHE_CHECK("after getreg"); +#endif + return tr->hreg; } -static int rcache_get_reg(sh2_reg_e r, rc_gr_mode mode) +static int rcache_get_reg(sh2_reg_e r, rc_gr_mode mode, int *hr) { - return rcache_get_reg_(r, mode, 1); + return rcache_get_reg_(r, mode, 1, hr); +} + +static void rcache_pin_reg(sh2_reg_e r) +{ + int hr, x; + + // don't pin if static or already pinned + if (guest_regs[r].flags & (GRF_STATIC|GRF_PINNED)) + return; + + rcache_regs_soon |= (1 << r); // kludge to prevent allocation of a temp + hr = rcache_get_reg_(r, RC_GR_RMW, 0, NULL); + x = reg_map_host[hr]; + + // can only pin non-TEMPs + if (!(cache_regs[x].htype & HRT_TEMP)) { + guest_regs[r].flags |= GRF_PINNED; + cache_regs[x].flags |= HRF_PINNED; + guest_regs[r].sreg = x; + rcache_regs_pinned |= (1 << r); + } +#if DRC_DEBUG & 64 + RCACHE_CHECK("after pin"); +#endif } static int rcache_get_tmp(void) { - temp_reg_t *tr; int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - if (reg_temp[i].type == HR_FREE) { - tr = ®_temp[i]; - goto do_alloc; - } + i = rcache_allocate_temp(); + if (i < 0) { + printf("cannot allocate temp\n"); + exit(1); + } - tr = rcache_evict(); + cache_regs[i].type = HR_TEMP; + rcache_lock_vreg(i); -do_alloc: - tr->type = HR_TEMP; - return tr->hreg; + return cache_regs[i].hreg; } -static int rcache_get_arg_id(int arg) +static int rcache_get_vreg_hr(int hr) { - int i, r = 0; - host_arg2reg(r, arg); + int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - if (reg_temp[i].hreg == r) - break; - - if (i == ARRAY_SIZE(reg_temp)) // can't happen - exit(1); - - if (reg_temp[i].type == HR_CACHED) { - // writeback - if (reg_temp[i].flags & HRF_DIRTY) - emith_ctx_write(reg_temp[i].hreg, reg_temp[i].greg * 4); - gconst_check_evict(reg_temp[i].greg); - } - else if (reg_temp[i].type == HR_TEMP) { - printf("arg %d reg %d already used, aborting\n", arg, r); + i = reg_map_host[hr]; + if (i < 0 || cache_regs[i].locked) { + printf("host register %d is locked\n", hr); exit(1); } - reg_temp[i].type = HR_FREE; - reg_temp[i].flags = 0; + if (cache_regs[i].type == HR_CACHED) + rcache_evict_vreg(i); + else if (cache_regs[i].type == HR_TEMP && cache_regs[i].locked) { + printf("host reg %d already used, aborting\n", hr); + exit(1); + } return i; } +static int rcache_get_vreg_arg(int arg) +{ + int hr = 0; + + host_arg2reg(hr, arg); + return rcache_get_vreg_hr(hr); +} + // get a reg to be used as function arg static int rcache_get_tmp_arg(int arg) { - int id = rcache_get_arg_id(arg); - reg_temp[id].type = HR_TEMP; + int x = rcache_get_vreg_arg(arg); + cache_regs[x].type = HR_TEMP; + rcache_lock_vreg(x); - return reg_temp[id].hreg; + return cache_regs[x].hreg; } -// same but caches a reg. RC_GR_READ only. -static int rcache_get_reg_arg(int arg, sh2_reg_e r) +// ... as return value after a call +static int rcache_get_tmp_ret(void) { - int i, srcr, dstr, dstid; - int dirty = 0, src_dirty = 0; + int x = rcache_get_vreg_hr(RET_REG); + cache_regs[x].type = HR_TEMP; + rcache_lock_vreg(x); - dstid = rcache_get_arg_id(arg); - dstr = reg_temp[dstid].hreg; + return cache_regs[x].hreg; +} - // maybe already statically mapped? - srcr = get_reg_static(r, RC_GR_READ); - if (srcr != -1) - goto do_cache; +// same but caches a reg if access is readonly (announced by hr being NULL) +static int rcache_get_reg_arg(int arg, sh2_reg_e r, int *hr) +{ + int i, srcr, dstr, dstid, keep; + u32 val; + host_arg2reg(dstr, arg); - // maybe already cached? - for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) { - if ((reg_temp[i].type == HR_CACHED) && - reg_temp[i].greg == r) - { - srcr = reg_temp[i].hreg; - if (reg_temp[i].flags & HRF_DIRTY) - src_dirty = 1; - goto do_cache; - } - } - - // must read - srcr = dstr; - if (gconst_check(r)) { - if (gconst_try_read(srcr, r)) - dirty = 1; - } + i = guest_regs[r].vreg; + if (i >= 0 && cache_regs[i].type == HR_CACHED && cache_regs[i].hreg == dstr) + // r is already in arg, avoid evicting + dstid = i; else + dstid = rcache_get_vreg_arg(arg); + dstr = cache_regs[dstid].hreg; + + if (rcache_is_cached(r)) { + // r is needed later on anyway + srcr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); + keep = 1; + } else if ((guest_regs[r].flags & GRF_CDIRTY) && gconst_get(r, &val)) { + // r has an uncomitted const - load into arg, but keep constant uncomitted + srcr = dstr; + emith_move_r_imm(srcr, val); + keep = 0; + } else { + // must read from ctx + srcr = dstr; emith_ctx_read(srcr, r * 4); + keep = 1; + } -do_cache: - if (dstr != srcr) - emith_move_r_r(dstr, srcr); -#if 1 - else - dirty |= src_dirty; + if (cache_regs[dstid].type == HR_CACHED) + rcache_evict_vreg(dstid); - if (dirty) - // must clean, callers might want to modify the arg before call - emith_ctx_write(dstr, r * 4); -#else - if (dirty) - reg_temp[dstid].flags |= HRF_DIRTY; + cache_regs[dstid].type = HR_TEMP; + if (hr == NULL) { + if (dstr != srcr) + // arg is a copy of cached r + emith_move_r_r(dstr, srcr); + else if (keep && guest_regs[r].vreg < 0) + // keep arg as vreg for r + rcache_add_vreg_alias(dstid, r); + } else { + *hr = srcr; + if (dstr != srcr) // must lock srcr if not copied here + rcache_lock_vreg(reg_map_host[srcr]); + } + + cache_regs[dstid].stamp = ++rcache_counter; + rcache_lock_vreg(dstid); +#if DRC_DEBUG & 64 + RCACHE_CHECK("after getarg"); #endif - - reg_temp[dstid].stamp = ++rcache_counter; - reg_temp[dstid].type = HR_CACHED; - reg_temp[dstid].greg = r; - reg_temp[dstid].flags |= HRF_LOCKED; return dstr; } static void rcache_free_tmp(int hr) { - int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - if (reg_temp[i].hreg == hr) - break; + int i = reg_map_host[hr]; - if (i == ARRAY_SIZE(reg_temp) || reg_temp[i].type != HR_TEMP) { - printf("rcache_free_tmp fail: #%i hr %d, type %d\n", i, hr, reg_temp[i].type); - return; + if (i < 0 || cache_regs[i].type != HR_TEMP) { + printf("rcache_free_tmp fail: #%i hr %d, type %d\n", i, hr, cache_regs[i].type); + exit(1); } - reg_temp[i].type = HR_FREE; - reg_temp[i].flags = 0; + rcache_unlock_vreg(i); } -static void rcache_unlock(int hr) +// saves temporary result either in REG or in drctmp +static int rcache_save_tmp(int hr) { int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - if (reg_temp[i].type == HR_CACHED && reg_temp[i].hreg == hr) - reg_temp[i].flags &= ~HRF_LOCKED; + + // find REG, either free or unlocked temp or oldest non-hinted cached + i = rcache_allocate_nontemp(); + if (i < 0) { + // if none is available, store in drctmp + emith_ctx_write(hr, offsetof(SH2, drc_tmp)); + rcache_free_tmp(hr); + return -1; + } + + cache_regs[i].type = HR_CACHED; + cache_regs[i].gregs = 0; // not storing any guest register + cache_regs[i].flags &= HRF_PINNED; + cache_regs[i].locked = 0; + cache_regs[i].stamp = ++rcache_counter; + rcache_lock_vreg(i); + emith_move_r_r(cache_regs[i].hreg, hr); + rcache_free_tmp(hr); + return i; +} + +static int rcache_restore_tmp(int x) +{ + int hr; + + // find REG with tmp store: cached but with no gregs + if (x >= 0) { + if (cache_regs[x].type != HR_CACHED || cache_regs[x].gregs) { + printf("invalid tmp storage %d\n", x); + exit(1); + } + // found, transform to a TEMP + cache_regs[x].type = HR_TEMP; + return cache_regs[x].hreg; + } + + // if not available, create a TEMP store and fetch from drctmp + hr = rcache_get_tmp(); + emith_ctx_read(hr, offsetof(SH2, drc_tmp)); + + return hr; +} + +static void rcache_free(int hr) +{ + int x = reg_map_host[hr]; + rcache_unlock_vreg(x); +} + +static void rcache_unlock(int x) +{ + if (x >= 0) + cache_regs[x].locked = 0; } static void rcache_unlock_all(void) { int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - reg_temp[i].flags &= ~HRF_LOCKED; + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + cache_regs[i].locked = 0; } -static inline u32 rcache_used_hreg_mask(void) +static void rcache_unpin_all(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { + if (guest_regs[i].flags & GRF_PINNED) { + guest_regs[i].flags &= ~GRF_PINNED; + cache_regs[guest_regs[i].sreg].flags &= ~HRF_PINNED; + guest_regs[i].sreg = -1; + rcache_regs_pinned &= ~(1 << i); + } + } +#if DRC_DEBUG & 64 + RCACHE_CHECK("after unpin"); +#endif +} + +static void rcache_save_pinned(void) +{ + int i; + + // save pinned regs to context + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if ((guest_regs[i].flags & GRF_PINNED) && guest_regs[i].vreg >= 0) + emith_ctx_write(cache_regs[guest_regs[i].vreg].hreg, i * 4); +} + +static inline void rcache_set_usage_now(u32 mask) +{ + rcache_regs_now = mask; +} + +static inline void rcache_set_usage_soon(u32 mask) +{ + rcache_regs_soon = mask; +} + +static inline void rcache_set_usage_late(u32 mask) +{ + rcache_regs_late = mask; +} + +static inline void rcache_set_usage_discard(u32 mask) +{ + rcache_regs_discard = mask; +} + +static inline int rcache_is_cached(sh2_reg_e r) +{ + // is r in cache or needed RSN? + u32 rsc = rcache_regs_soon | rcache_regs_clean; + return (guest_regs[r].vreg >= 0 || (rsc & (1 << r))); +} + +static inline int rcache_is_hreg_used(int hr) +{ + int x = reg_map_host[hr]; + // is hr in use? + return cache_regs[x].type != HR_FREE && + (cache_regs[x].type != HR_TEMP || cache_regs[x].locked); +} + +static inline u32 rcache_used_hregs_mask(void) { u32 mask = 0; int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - if (reg_temp[i].type != HR_FREE) - mask |= 1 << reg_temp[i].hreg; + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + if ((cache_regs[i].htype & HRT_TEMP) && cache_regs[i].type != HR_FREE && + (cache_regs[i].type != HR_TEMP || cache_regs[i].locked)) + mask |= 1 << cache_regs[i].hreg; return mask; } +static inline u32 rcache_dirty_mask(void) +{ + u32 mask = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) + if (guest_regs[i].flags & GRF_DIRTY) + mask |= 1 << i; + mask |= gconst_dirty_mask(); + + return mask; +} + +static inline u32 rcache_cached_mask(void) +{ + u32 mask = 0; + int i; + + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + if (cache_regs[i].type == HR_CACHED) + mask |= cache_regs[i].gregs; + + return mask; +} + +static void rcache_clean_tmp(void) +{ + int i; + + rcache_regs_clean = (1 << ARRAY_SIZE(guest_regs)) - 1; + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + if (cache_regs[i].type == HR_CACHED && (cache_regs[i].htype & HRT_TEMP)) { + rcache_unlock(i); + rcache_remap_vreg(i); + } + rcache_regs_clean = 0; +} + +static void rcache_clean_masked(u32 mask) +{ + int i, r, hr; + u32 m; + + rcache_regs_clean |= mask; + mask = rcache_regs_clean; + + // clean constants where all aliases are covered by the mask, exempt statics + // to avoid flushing them to context if sreg isn't available + m = mask & ~(rcache_regs_static | rcache_regs_pinned); + for (i = 0; i < ARRAY_SIZE(gconsts); i++) + if ((gconsts[i].gregs & m) && !(gconsts[i].gregs & ~mask)) { + FOR_ALL_BITS_SET_DO(gconsts[i].gregs, r, + if (guest_regs[r].flags & GRF_CDIRTY) { + hr = rcache_get_reg_(r, RC_GR_READ, 0, NULL); + rcache_clean_vreg(reg_map_host[hr]); + break; + }); + } + // clean vregs where all aliases are covered by the mask + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + if (cache_regs[i].type == HR_CACHED && + (cache_regs[i].gregs & mask) && !(cache_regs[i].gregs & ~mask)) + rcache_clean_vreg(i); +} + static void rcache_clean(void) { int i; gconst_clean(); - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) - if (reg_temp[i].type == HR_CACHED && (reg_temp[i].flags & HRF_DIRTY)) { - // writeback - emith_ctx_write(reg_temp[i].hreg, reg_temp[i].greg * 4); - reg_temp[i].flags &= ~HRF_DIRTY; + rcache_regs_clean = (1 << ARRAY_SIZE(guest_regs)) - 1; + for (i = ARRAY_SIZE(cache_regs)-1; i >= 0; i--) + if (cache_regs[i].type == HR_CACHED) + rcache_clean_vreg(i); + + // relocate statics to their sregs (necessary before conditional jumps) + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { + if ((guest_regs[i].flags & (GRF_STATIC|GRF_PINNED)) && + guest_regs[i].vreg != guest_regs[i].sreg) { + rcache_lock_vreg(guest_regs[i].vreg); + rcache_evict_vreg(guest_regs[i].sreg); + rcache_unlock_vreg(guest_regs[i].vreg); + if (guest_regs[i].vreg < 0) + emith_ctx_read(cache_regs[guest_regs[i].sreg].hreg, i*4); + else { + emith_move_r_r(cache_regs[guest_regs[i].sreg].hreg, + cache_regs[guest_regs[i].vreg].hreg); + rcache_copy_x16(cache_regs[guest_regs[i].sreg].hreg, + cache_regs[guest_regs[i].vreg].hreg); + rcache_remove_vreg_alias(guest_regs[i].vreg, i); + } + cache_regs[guest_regs[i].sreg].gregs = 1 << i; + cache_regs[guest_regs[i].sreg].type = HR_CACHED; + cache_regs[guest_regs[i].sreg].flags |= HRF_DIRTY|HRF_PINNED; + guest_regs[i].flags |= GRF_DIRTY; + guest_regs[i].vreg = guest_regs[i].sreg; } + } + rcache_regs_clean = 0; +} + +static void rcache_invalidate_tmp(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { + if (cache_regs[i].htype & HRT_TEMP) { + rcache_unlock(i); + if (cache_regs[i].type == HR_CACHED) + rcache_evict_vreg(i); + else + rcache_free_vreg(i); + } + } } static void rcache_invalidate(void) { int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) { - reg_temp[i].type = HR_FREE; - reg_temp[i].flags = 0; - } - rcache_counter = 0; - gconst_invalidate(); + rcache_unlock_all(); + + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) + rcache_free_vreg(i); + + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { + guest_regs[i].flags &= GRF_STATIC; + if (!(guest_regs[i].flags & GRF_STATIC)) + guest_regs[i].vreg = -1; + else { + cache_regs[guest_regs[i].sreg].gregs = 1 << i; + cache_regs[guest_regs[i].sreg].type = HR_CACHED; + cache_regs[guest_regs[i].sreg].flags |= HRF_DIRTY|HRF_PINNED; + guest_regs[i].flags |= GRF_DIRTY; + guest_regs[i].vreg = guest_regs[i].sreg; + } + } + + rcache_counter = 0; + rcache_regs_now = rcache_regs_soon = rcache_regs_late = 0; + rcache_regs_discard = rcache_regs_clean = 0; } static void rcache_flush(void) @@ -1060,250 +2517,503 @@ static void rcache_flush(void) rcache_invalidate(); } +static void rcache_create(void) +{ + int x = 0, i; + + // create cache_regs as host register representation + // RET_REG/params should be first TEMPs to avoid allocation conflicts in calls + cache_regs[x++] = (cache_reg_t) {.hreg = RET_REG, .htype = HRT_TEMP}; + for (i = 0; i < ARRAY_SIZE(hregs_param); i++) + if (hregs_param[i] != RET_REG) + cache_regs[x++] = (cache_reg_t){.hreg = hregs_param[i],.htype = HRT_TEMP}; + + for (i = 0; i < ARRAY_SIZE(hregs_temp); i++) + if (hregs_temp[i] != RET_REG) + cache_regs[x++] = (cache_reg_t){.hreg = hregs_temp[i], .htype = HRT_TEMP}; + + for (i = ARRAY_SIZE(hregs_saved)-1; i >= 0; i--) + if (hregs_saved[i] != CONTEXT_REG) + cache_regs[x++] = (cache_reg_t){.hreg = hregs_saved[i], .htype = HRT_REG}; + + if (x != ARRAY_SIZE(cache_regs)) { + printf("rcache_create failed (conflicting register count)\n"); + exit(1); + } + + // mapping from host_register to cache regs index + memset(reg_map_host, -1, sizeof(reg_map_host)); + for (i = 0; i < ARRAY_SIZE(cache_regs); i++) { + if (cache_regs[i].htype) + reg_map_host[cache_regs[i].hreg] = i; + if (cache_regs[i].htype == HRT_REG) + rcache_vregs_reg |= (1 << i); + } + + // create static host register mapping for SH2 regs + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { + guest_regs[i] = (guest_reg_t){.sreg = -1}; + } + for (i = 0; i < ARRAY_SIZE(regs_static); i += 2) { + for (x = ARRAY_SIZE(cache_regs)-1; x >= 0; x--) + if (cache_regs[x].hreg == regs_static[i+1]) break; + if (x >= 0) { + guest_regs[regs_static[i]] = (guest_reg_t){.flags = GRF_STATIC,.sreg = x}; + rcache_regs_static |= (1 << regs_static[i]); + rcache_vregs_reg &= ~(1 << x); + } + } + + printf("DRC registers created, %ld host regs (%d REG, %d STATIC, 1 CTX)\n", + CACHE_REGS+1L, count_bits(rcache_vregs_reg),count_bits(rcache_regs_static)); +} + +static void rcache_init(void) +{ + // create DRC data structures + rcache_create(); + + rcache_invalidate(); +#if DRC_DEBUG & 64 + RCACHE_CHECK("after init"); +#endif +} + // --------------------------------------------------------------- -static int emit_get_rbase_and_offs(u32 a, u32 *offs) +// swap 32 bit value read from mem in generated code (same as CPU_BE2) +static void emit_le_swap(int cond, int r) { - u32 mask = 0; - int poffs; - int hr; +#if CPU_IS_LE + if (cond == -1) + emith_ror(r, r, 16); + else + emith_ror_c(cond, r, r, 16); +#endif +} - poffs = dr_ctx_get_mem_ptr(a, &mask); +// fix memory byte ptr in generated code (same as MEM_BE2) +static void emit_le_ptr8(int cond, int r) +{ +#if CPU_IS_LE + if (cond == -1) + emith_eor_r_imm_ptr(r, 1); + else + emith_eor_r_imm_ptr_c(cond, r, 1); +#endif +} + +// split address by mask, in base part (upper) and offset (lower, signed!) +static uptr split_address(uptr la, uptr mask, s32 *offs) +{ + uptr sign = (mask>>1) + 1; // sign bit in offset + *offs = (la & mask) | (la & sign ? ~mask : 0); // offset part, sign extended + la = (la & ~mask) + ((la & sign) << 1); // base part, corrected for offs sign +#ifdef __arm__ + // arm32 offset has an add/sub flag and an unsigned 8 bit value, which only + // allows values of [-255...255]. the value -256 thus can't be used. + if (*offs < 0) { // TODO not working at all with negative offsets on ARM? + //if (*offs == -sign) { + la -= sign; + *offs += sign; + } +#endif + return la; +} + +// NB may return either REG or TEMP +static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, s32 *offs) +{ + uptr omask = emith_rw_offs_max(); // offset mask + u32 mask = 0; + u32 a; + int poffs; + int hr, hr2; + uptr la; + + // is r constant and points to a memory region? + if (! gconst_get(r, &a)) + return -1; + poffs = dr_ctx_get_mem_ptr(sh2, a + *offs, &mask); if (poffs == -1) return -1; - // XXX: could use some related reg - hr = rcache_get_tmp(); - emith_ctx_read(hr, poffs); - emith_add_r_imm(hr, a & mask & ~0xff); - *offs = a & 0xff; // XXX: ARM oriented.. + if (mask < 0x20000) { + // data array, BIOS, DRAM, can't safely access directly since host addr may + // change (BIOS,da code may run on either core, DRAM may be switched) + hr = rcache_get_tmp(); + a = (a + *offs) & mask; + if (poffs == offsetof(SH2, p_da)) { + // access sh2->data_array directly + a = split_address(a + offsetof(SH2, data_array), omask, offs); + emith_add_r_r_ptr_imm(hr, CONTEXT_REG, a); + } else { + a = split_address(a, omask, offs); + emith_ctx_read_ptr(hr, poffs); + if (a) + emith_add_r_r_ptr_imm(hr, hr, a); + } + return hr; + } + + // ROM, SDRAM. Host address should be mmapped to be equal to SH2 address. + la = (uptr)*(void **)((char *)sh2 + poffs); + + // if r is in rcache or needed soon anyway, and offs is relative to region, + // and address translation fits in add_ptr_imm (s32), then use rcached const + if (la == (s32)la && !(((a & mask) + *offs) & ~mask) && rcache_is_cached(r)) { +#if CPU_IS_LE // need to fix odd address for correct byte addressing + if (a & 1) *offs += (*offs&1) ? 2 : -2; +#endif + la -= (s32)((a & ~mask) - *offs); // diff between reg and memory + hr = hr2 = rcache_get_reg(r, rmode, NULL); + if ((s32)a < 0) emith_uext_ptr(hr2); + la = split_address(la, omask, offs); + if (la) { + hr = rcache_get_tmp(); + emith_add_r_r_ptr_imm(hr, hr2, la); + rcache_free(hr2); + } + } else { + // known fixed host address + la = split_address(la + ((a + *offs) & mask), omask, offs); + if (la == 0) { + // offset only. optimize for hosts having short indexed addressing + la = *offs & ~0x7f; // keep the lower bits for endianess handling + *offs &= 0x7f; + } + hr = rcache_get_tmp(); + emith_move_r_ptr_imm(hr, la); + } return hr; } +// read const data from const ROM address +static int emit_get_rom_data(SH2 *sh2, sh2_reg_e r, s32 offs, int size, u32 *val) +{ + u32 a, mask; + + *val = 0; + if (gconst_get(r, &a)) { + a += offs; + // check if rom is memory mapped (not bank switched), and address is in rom + if (p32x_sh2_mem_is_rom(a, sh2) && p32x_sh2_get_mem_ptr(a, &mask, sh2) == sh2->p_rom) { + switch (size & MF_SIZEMASK) { + case 0: *val = (s8)p32x_sh2_read8(a, sh2s); break; // 8 + case 1: *val = (s16)p32x_sh2_read16(a, sh2s); break; // 16 + case 2: *val = p32x_sh2_read32(a, sh2s); break; // 32 + } + return 1; + } + } + return 0; +} + static void emit_move_r_imm32(sh2_reg_e dst, u32 imm) { #if PROPAGATE_CONSTANTS gconst_new(dst, imm); #else - int hr = rcache_get_reg(dst, RC_GR_WRITE); + int hr = rcache_get_reg(dst, RC_GR_WRITE, NULL); emith_move_r_imm(hr, imm); #endif } static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src) { - int hr_d = rcache_get_reg(dst, RC_GR_WRITE); - int hr_s = rcache_get_reg(src, RC_GR_READ); - - emith_move_r_r(hr_d, hr_s); -} - -// T must be clear, and comparison done just before this -static void emit_or_t_if_eq(int srr) -{ - EMITH_SJMP_START(DCOND_NE); - emith_or_r_imm_c(DCOND_EQ, srr, T); - EMITH_SJMP_END(DCOND_NE); -} - -// arguments must be ready -// reg cache must be clean before call -static int emit_memhandler_read_(int size, int ram_check) -{ - int arg1; -#if 0 - int arg0; - host_arg2reg(arg0, 0); -#endif - - rcache_clean(); - - // must writeback cycles for poll detection stuff - // FIXME: rm - if (reg_map_g2h[SHR_SR] != -1) - emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4); - - arg1 = rcache_get_tmp_arg(1); - emith_move_r_r(arg1, CONTEXT_REG); - -#if 0 // can't do this because of unmapped reads - // ndef PDB_NET - if (ram_check && Pico.rom == (void *)0x02000000 && Pico32xMem->sdram == (void *)0x06000000) { - int tmp = rcache_get_tmp(); - emith_and_r_r_imm(tmp, arg0, 0xfb000000); - emith_cmp_r_imm(tmp, 0x02000000); - switch (size) { - case 0: // 8 - EMITH_SJMP3_START(DCOND_NE); - emith_eor_r_imm_c(DCOND_EQ, arg0, 1); - emith_read8_r_r_offs_c(DCOND_EQ, arg0, arg0, 0); - EMITH_SJMP3_MID(DCOND_NE); - emith_call_cond(DCOND_NE, sh2_drc_read8); - EMITH_SJMP3_END(); - break; - case 1: // 16 - EMITH_SJMP3_START(DCOND_NE); - emith_read16_r_r_offs_c(DCOND_EQ, arg0, arg0, 0); - EMITH_SJMP3_MID(DCOND_NE); - emith_call_cond(DCOND_NE, sh2_drc_read16); - EMITH_SJMP3_END(); - break; - case 2: // 32 - EMITH_SJMP3_START(DCOND_NE); - emith_read_r_r_offs_c(DCOND_EQ, arg0, arg0, 0); - emith_ror_c(DCOND_EQ, arg0, arg0, 16); - EMITH_SJMP3_MID(DCOND_NE); - emith_call_cond(DCOND_NE, sh2_drc_read32); - EMITH_SJMP3_END(); - break; - } + if (gconst_check(src) || rcache_is_cached(src)) + rcache_alias_vreg(dst, src); + else { + int hr_d = rcache_get_reg(dst, RC_GR_WRITE, NULL); + emith_ctx_read(hr_d, src * 4); } - else -#endif - { - switch (size) { - case 0: // 8 - emith_call(sh2_drc_read8); - break; - case 1: // 16 - emith_call(sh2_drc_read16); - break; - case 2: // 32 - emith_call(sh2_drc_read32); - break; - } - } - rcache_invalidate(); - - if (reg_map_g2h[SHR_SR] != -1) - emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4); - - // assuming arg0 and retval reg matches - return rcache_get_tmp_arg(0); } +static void emit_add_r_imm(sh2_reg_e r, u32 imm) +{ + u32 val; + int isgc = gconst_get(r, &val); + int hr, hr2; + + if (!isgc || rcache_is_cached(r)) { + // not constant, or r is already in cache + hr = rcache_get_reg(r, RC_GR_RMW, &hr2); + emith_add_r_r_imm(hr, hr2, imm); + rcache_free(hr2); + if (isgc) + gconst_set(r, val + imm); + } else + gconst_new(r, val + imm); +} + +static void emit_sub_r_imm(sh2_reg_e r, u32 imm) +{ + u32 val; + int isgc = gconst_get(r, &val); + int hr, hr2; + + if (!isgc || rcache_is_cached(r)) { + // not constant, or r is already in cache + hr = rcache_get_reg(r, RC_GR_RMW, &hr2); + emith_sub_r_r_imm(hr, hr2, imm); + rcache_free(hr2); + if (isgc) + gconst_set(r, val - imm); + } else + gconst_new(r, val - imm); +} + +static void emit_sync_t_to_sr(void) +{ + // avoid reloading SR from context if there's nothing to do + if (emith_get_t_cond() >= 0) { + int sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); + } +} + +// rd = @(arg0) static int emit_memhandler_read(int size) { - return emit_memhandler_read_(size, 1); -} + int hr; -static int emit_memhandler_read_rr(sh2_reg_e rd, sh2_reg_e rs, u32 offs, int size) -{ - int hr, hr2, ram_check = 1; - u32 val, offs2; + emit_sync_t_to_sr(); + rcache_clean_tmp(); +#ifndef DRC_SR_REG + // must writeback cycles for poll detection stuff + if (guest_regs[SHR_SR].vreg != -1) + rcache_unmap_vreg(guest_regs[SHR_SR].vreg); +#endif + rcache_invalidate_tmp(); - if (gconst_get(rs, &val)) { - hr = emit_get_rbase_and_offs(val + offs, &offs2); - if (hr != -1) { - hr2 = rcache_get_reg(rd, RC_GR_WRITE); - switch (size) { - case 0: // 8 - emith_read8_r_r_offs(hr2, hr, offs2 ^ 1); - emith_sext(hr2, hr2, 8); - break; - case 1: // 16 - emith_read16_r_r_offs(hr2, hr, offs2); - emith_sext(hr2, hr2, 16); - break; - case 2: // 32 - emith_read_r_r_offs(hr2, hr, offs2); - emith_ror(hr2, hr2, 16); - break; - } - rcache_free_tmp(hr); - return hr2; + if (size & MF_POLLING) + switch (size & MF_SIZEMASK) { + case 0: emith_call(sh2_drc_read8_poll); break; // 8 + case 1: emith_call(sh2_drc_read16_poll); break; // 16 + case 2: emith_call(sh2_drc_read32_poll); break; // 32 + } + else + switch (size & MF_SIZEMASK) { + case 0: emith_call(sh2_drc_read8); break; // 8 + case 1: emith_call(sh2_drc_read16); break; // 16 + case 2: emith_call(sh2_drc_read32); break; // 32 } - ram_check = 0; + hr = rcache_get_tmp_ret(); + rcache_set_x16(hr, (size & MF_SIZEMASK) < 2, 0); + return hr; +} + +// @(arg0) = arg1 +static void emit_memhandler_write(int size) +{ + emit_sync_t_to_sr(); + rcache_clean_tmp(); +#ifndef DRC_SR_REG + if (guest_regs[SHR_SR].vreg != -1) + rcache_unmap_vreg(guest_regs[SHR_SR].vreg); +#endif + rcache_invalidate_tmp(); + + switch (size & MF_SIZEMASK) { + case 0: emith_call(sh2_drc_write8); break; // 8 + case 1: emith_call(sh2_drc_write16); break; // 16 + case 2: emith_call(sh2_drc_write32); break; // 32 + } +} + +// rd = @(Rs,#offs); rd < 0 -> return a temp +static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, s32 offs, int size) +{ + int hr, hr2; + u32 val; + +#if PROPAGATE_CONSTANTS + if (emit_get_rom_data(sh2, rs, offs, size, &val)) { + if (rd == SHR_TMP) { + hr2 = rcache_get_tmp(); + emith_move_r_imm(hr2, val); + } else { + emit_move_r_imm32(rd, val); + hr2 = rcache_get_reg(rd, RC_GR_RMW, NULL); + } + rcache_set_x16(hr2, val == (s16)val, val == (u16)val); + if (size & MF_POSTINCR) + emit_add_r_imm(rs, 1 << (size & MF_SIZEMASK)); + return hr2; } - hr = rcache_get_reg_arg(0, rs); - if (offs != 0) - emith_add_r_imm(hr, offs); - hr = emit_memhandler_read_(size, ram_check); - hr2 = rcache_get_reg(rd, RC_GR_WRITE); - if (size != 2) { - emith_sext(hr2, hr, (size == 1) ? 16 : 8); - } else - emith_move_r_r(hr2, hr); - rcache_free_tmp(hr); + val = size & MF_POSTINCR; + hr = emit_get_rbase_and_offs(sh2, rs, val ? RC_GR_RMW : RC_GR_READ, &offs); + if (hr != -1) { + if (rd == SHR_TMP) + hr2 = rcache_get_tmp(); + else + hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL); + switch (size & MF_SIZEMASK) { + case 0: emith_read8s_r_r_offs(hr2, hr, MEM_BE2(offs)); break; // 8 + case 1: emith_read16s_r_r_offs(hr2, hr, offs); break; // 16 + case 2: emith_read_r_r_offs(hr2, hr, offs); emit_le_swap(-1, hr2); break; + } + rcache_free(hr); + if (size & MF_POSTINCR) + emit_add_r_imm(rs, 1 << (size & MF_SIZEMASK)); + return hr2; + } +#endif + if (gconst_get(rs, &val) && !rcache_is_cached(rs)) { + hr = rcache_get_tmp_arg(0); + emith_move_r_imm(hr, val + offs); + if (size & MF_POSTINCR) + gconst_new(rs, val + (1 << (size & MF_SIZEMASK))); + } else if (size & MF_POSTINCR) { + hr = rcache_get_tmp_arg(0); + hr2 = rcache_get_reg(rs, RC_GR_RMW, NULL); + emith_add_r_r_imm(hr, hr2, offs); + emith_add_r_imm(hr2, 1 << (size & MF_SIZEMASK)); + if (gconst_get(rs, &val)) + gconst_set(rs, val + (1 << (size & MF_SIZEMASK))); + } else { + hr = rcache_get_reg_arg(0, rs, &hr2); + if (offs || hr != hr2) + emith_add_r_r_imm(hr, hr2, offs); + } + hr = emit_memhandler_read(size); + + if (rd == SHR_TMP) + hr2 = hr; + else + hr2 = rcache_map_reg(rd, hr); + + if (hr != hr2) { + emith_move_r_r(hr2, hr); + rcache_free_tmp(hr); + } return hr2; } -static void emit_memhandler_write(int size) +// @(Rs,#offs) = rd; rd < 0 -> write arg1 +static void emit_memhandler_write_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, s32 offs, int size) { - int ctxr; - host_arg2reg(ctxr, 2); - if (reg_map_g2h[SHR_SR] != -1) - emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4); + int hr, hr2; + u32 val; - rcache_clean(); + if (rd == SHR_TMP) { + host_arg2reg(hr2, 1); // already locked and prepared by caller + } else if ((size & MF_PREDECR) && rd == rs) { // must avoid caching rd in arg1 + hr2 = rcache_get_reg_arg(1, rd, &hr); + if (hr != hr2) { + emith_move_r_r(hr2, hr); + rcache_free(hr2); + } + } else + hr2 = rcache_get_reg_arg(1, rd, NULL); + if (rd != SHR_TMP) + rcache_unlock(guest_regs[rd].vreg); // unlock in case rd is in arg0 - switch (size) { - case 0: // 8 - // XXX: consider inlining sh2_drc_write8 - emith_call(sh2_drc_write8); - break; - case 1: // 16 - emith_call(sh2_drc_write16); - break; - case 2: // 32 - emith_move_r_r(ctxr, CONTEXT_REG); - emith_call(sh2_drc_write32); - break; + if (gconst_get(rs, &val) && !rcache_is_cached(rs)) { + hr = rcache_get_tmp_arg(0); + if (size & MF_PREDECR) { + val -= 1 << (size & MF_SIZEMASK); + gconst_new(rs, val); + } + emith_move_r_imm(hr, val + offs); + } else if (offs || (size & MF_PREDECR)) { + if (size & MF_PREDECR) + emit_sub_r_imm(rs, 1 << (size & MF_SIZEMASK)); + rcache_unlock(guest_regs[rs].vreg); // unlock in case rs is in arg0 + hr = rcache_get_reg_arg(0, rs, &hr2); + if (offs || hr != hr2) + emith_add_r_r_imm(hr, hr2, offs); + } else + hr = rcache_get_reg_arg(0, rs, NULL); + + emit_memhandler_write(size); +} + +// rd = @(Rx,Ry); rd < 0 -> return a temp +static int emit_indirect_indexed_read(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry, int size) +{ + int hr, hr2; + int tx, ty; +#if PROPAGATE_CONSTANTS + u32 offs; + + // if offs is larger than 0x01000000, it's most probably the base address part + if (gconst_get(ry, &offs) && offs < 0x01000000) + return emit_memhandler_read_rr(sh2, rd, rx, offs, size); + if (gconst_get(rx, &offs) && offs < 0x01000000) + return emit_memhandler_read_rr(sh2, rd, ry, offs, size); +#endif + hr = rcache_get_reg_arg(0, rx, &tx); + ty = rcache_get_reg(ry, RC_GR_READ, NULL); + emith_add_r_r_r(hr, tx, ty); + hr = emit_memhandler_read(size); + + if (rd == SHR_TMP) + hr2 = hr; + else + hr2 = rcache_map_reg(rd, hr); + + if (hr != hr2) { + emith_move_r_r(hr2, hr); + rcache_free_tmp(hr); } - - rcache_invalidate(); - if (reg_map_g2h[SHR_SR] != -1) - emith_ctx_read(reg_map_g2h[SHR_SR], SHR_SR * 4); + return hr2; } -// @(Rx,Ry) -static int emit_indirect_indexed_read(int rx, int ry, int size) +// @(Rx,Ry) = rd; rd < 0 -> write arg1 +static void emit_indirect_indexed_write(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh2_reg_e ry, int size) { - int a0, t; - a0 = rcache_get_reg_arg(0, rx); - t = rcache_get_reg(ry, RC_GR_READ); - emith_add_r_r(a0, t); - return emit_memhandler_read(size); + int hr, tx, ty; +#if PROPAGATE_CONSTANTS + u32 offs; + + // if offs is larger than 0x01000000, it's most probably the base address part + if (gconst_get(ry, &offs) && offs < 0x01000000) + return emit_memhandler_write_rr(sh2, rd, rx, offs, size); + if (gconst_get(rx, &offs) && offs < 0x01000000) + return emit_memhandler_write_rr(sh2, rd, ry, offs, size); +#endif + if (rd != SHR_TMP) + rcache_get_reg_arg(1, rd, NULL); + hr = rcache_get_reg_arg(0, rx, &tx); + ty = rcache_get_reg(ry, RC_GR_READ, NULL); + emith_add_r_r_r(hr, tx, ty); + emit_memhandler_write(size); } -// read @Rn, @rm -static void emit_indirect_read_double(u32 *rnr, u32 *rmr, int rn, int rm, int size) +// @Rn+,@Rm+ +static void emit_indirect_read_double(SH2 *sh2, int *rnr, int *rmr, sh2_reg_e rn, sh2_reg_e rm, int size) { int tmp; - rcache_get_reg_arg(0, rn); - tmp = emit_memhandler_read(size); - emith_ctx_write(tmp, offsetof(SH2, drc_tmp)); - rcache_free_tmp(tmp); - tmp = rcache_get_reg(rn, RC_GR_RMW); - emith_add_r_imm(tmp, 1 << size); - rcache_unlock(tmp); - - rcache_get_reg_arg(0, rm); - *rmr = emit_memhandler_read(size); - *rnr = rcache_get_tmp(); - emith_ctx_read(*rnr, offsetof(SH2, drc_tmp)); - tmp = rcache_get_reg(rm, RC_GR_RMW); - emith_add_r_imm(tmp, 1 << size); - rcache_unlock(tmp); + // unlock rn, rm here to avoid REG shortage in MAC operation + tmp = emit_memhandler_read_rr(sh2, SHR_TMP, rn, 0, size | MF_POSTINCR); + rcache_unlock(guest_regs[rn].vreg); + tmp = rcache_save_tmp(tmp); + *rmr = emit_memhandler_read_rr(sh2, SHR_TMP, rm, 0, size | MF_POSTINCR); + rcache_unlock(guest_regs[rm].vreg); + *rnr = rcache_restore_tmp(tmp); } static void emit_do_static_regs(int is_write, int tmpr) { int i, r, count; - for (i = 0; i < ARRAY_SIZE(reg_map_g2h); i++) { - r = reg_map_g2h[i]; - if (r == -1) + for (i = 0; i < ARRAY_SIZE(guest_regs); i++) { + if (guest_regs[i].flags & (GRF_STATIC|GRF_PINNED)) + r = cache_regs[guest_regs[i].vreg].hreg; + else continue; - for (count = 1; i < ARRAY_SIZE(reg_map_g2h) - 1; i++, r++) { - if (reg_map_g2h[i + 1] != r + 1) + for (count = 1; i < ARRAY_SIZE(guest_regs) - 1; i++, r++) { + if ((guest_regs[i + 1].flags & (GRF_STATIC|GRF_PINNED)) && + cache_regs[guest_regs[i + 1].vreg].hreg == r + 1) + count++; + else break; - count++; } if (count > 1) { @@ -1321,131 +3031,431 @@ static void emit_do_static_regs(int is_write, int tmpr) } } -static void emit_block_entry(void) +#if DIV_OPTIMIZER +// divide operation replacement functions, called by compiled code. Only the +// 32:16 cases and the 64:32 cases described in the SH2 prog man are replaced. + +// This is surprisingly difficult since the SH2 division operation is generating +// the result in the dividend during the operation, leaving some remainder-like +// stuff in the bits unused for the result, and leaving the T and Q status bits +// in a state depending on the operands and the result. Q always reflects the +// last result bit generated (i.e. bit 0 of the result). For T: +// 32:16 T = top bit of the 16 bit remainder-like +// 64:32 T = resulting T of the DIV0U/S operation +// The remainder-like depends on outcome of the last generated result bit. + +static uint32_t REGPARM(3) sh2_drc_divu32(uint32_t dv, uint32_t *dt, uint32_t ds) { - int arg0; + if (likely(ds > dv && (uint16_t)ds == 0)) { + // good case: no overflow, divisor not 0, lower 16 bits 0 + uint32_t quot = dv / (ds>>16), rem = dv - (quot * (ds>>16)); + if (~quot&1) rem -= ds>>16; + *dt = (rem>>15) & 1; + return (uint16_t)quot | ((2*rem + (quot>>31)) << 16); + } else { + // bad case: use the sh2 algo to get the right result + int q = 0, t = 0, s = 16; + while (s--) { + uint32_t v = dv>>31; + dv = (dv<<1) | t; + t = v; + v = dv; + if (q) dv += ds, q = dv < v; + else dv -= ds, q = dv > v; + q ^= t, t = !q; + } + *dt = dv>>31; + return (dv<<1) | t; + } +} - host_arg2reg(arg0, 0); +static uint32_t REGPARM(3) sh2_drc_divu64(uint32_t dh, uint32_t *dl, uint32_t ds) +{ + uint64_t dv = *dl | ((uint64_t)dh << 32); + if (likely(ds > dh)) { + // good case: no overflow, divisor not 0 + uint32_t quot = dv / ds, rem = dv - ((uint64_t)quot * ds); + if (~quot&1) rem -= ds; + *dl = quot; + return rem; + } else { + // bad case: use the sh2 algo to get the right result + int q = 0, t = 0, s = 32; + while (s--) { + uint64_t v = dv>>63; + dv = (dv<<1) | t; + t = v; + v = dv; + if (q) dv += ((uint64_t)ds << 32), q = dv < v; + else dv -= ((uint64_t)ds << 32), q = dv > v; + q ^= t, t = !q; + } + *dl = (dv<<1) | t; + return (dv>>32); + } +} -#if (DRC_DEBUG & 8) || defined(PDB) - int arg1, arg2; - host_arg2reg(arg1, 1); - host_arg2reg(arg2, 2); +static uint32_t REGPARM(3) sh2_drc_divs32(int32_t dv, uint32_t *dt, int32_t ds) +{ + uint32_t adv = abs(dv), ads = abs(ds)>>16; + if (likely(ads > adv>>16 && ds != 0x80000000 && (int16_t)ds == 0)) { + // good case: no overflow, divisor not 0 and not MIN_INT, lower 16 bits 0 + uint32_t quot = adv / ads, rem = adv - (quot * ads); + int m1 = (rem ? dv^ds : ds) < 0; + if (rem && dv < 0) rem = (quot&1 ? -rem : +ads-rem); + else rem = (quot&1 ? +rem : -ads+rem); + quot = ((dv^ds)<0 ? -quot : +quot) - m1; + *dt = (rem>>15) & 1; + return (uint16_t)quot | ((2*rem + (quot>>31)) << 16); + } else { + // bad case: use the sh2 algo to get the right result + int m = (uint32_t)ds>>31, q = (uint32_t)dv>>31, t = m^q, s = 16; + while (s--) { + uint32_t v = (uint32_t)dv>>31; + dv = (dv<<1) | t; + t = v; + v = dv; + if (m^q) dv += ds, q = (uint32_t)dv < v; + else dv -= ds, q = (uint32_t)dv > v; + q ^= m^t, t = !(m^q); + } + *dt = (uint32_t)dv>>31; + return (dv<<1) | t; + } +} - emit_do_static_regs(1, arg2); - emith_move_r_r(arg1, CONTEXT_REG); - emith_move_r_r(arg2, rcache_get_reg(SHR_SR, RC_GR_READ)); - emith_call(sh2_drc_log_entry); - rcache_invalidate(); +static uint32_t REGPARM(3) sh2_drc_divs64(int32_t dh, uint32_t *dl, int32_t ds) +{ + int64_t _dv = *dl | ((int64_t)dh << 32); + uint32_t ads = abs(ds); + if (likely(_dv >= 0 && ads > _dv>>32 && ds != 0x80000000) || + likely(_dv < 0 && ads > -_dv>>32 && ds != 0x80000000)) { + uint64_t adv = (_dv < 0 ? -_dv : _dv); // no llabs in older toolchains + // good case: no overflow, divisor not 0 and not MIN_INT + uint32_t quot = adv / ads, rem = adv - ((uint64_t)quot * ads); + int m1 = (rem ? dh^ds : ds) < 0; + if (rem && dh < 0) rem = (quot&1 ? -rem : +ads-rem); + else rem = (quot&1 ? +rem : -ads+rem); + quot = ((dh^ds)<0 ? -quot : +quot) - m1; + *dl = quot; + return rem; + } else { + // bad case: use the sh2 algo to get the right result + uint64_t dv = (uint64_t)_dv; + int m = (uint32_t)ds>>31, q = (uint64_t)dv>>63, t = m^q, s = 32; + while (s--) { + uint64_t v = (uint64_t)dv>>63; + dv = (dv<<1) | t; + t = v; + v = dv; + if (m^q) dv += ((uint64_t)ds << 32), q = dv < v; + else dv -= ((uint64_t)ds << 32), q = dv > v; + q ^= m^t, t = !(m^q); + } + *dl = (dv<<1) | t; + return (dv>>32); + } +} #endif - emith_tst_r_r(arg0, arg0); - EMITH_SJMP_START(DCOND_EQ); - emith_jump_reg_c(DCOND_NE, arg0); - EMITH_SJMP_END(DCOND_EQ); + +// block local link stuff +struct linkage { + u32 pc; + void *ptr; + struct block_link *bl; + u32 mask; +}; + +static inline int find_in_linkage(const struct linkage *array, int size, u32 pc) +{ + size_t i; + for (i = 0; i < size; i++) + if (pc == array[i].pc) + return i; + + return -1; +} + +static int find_in_sorted_linkage(const struct linkage *array, int size, u32 pc) +{ + // binary search in sorted array + int left = 0, right = size-1; + while (left <= right) + { + int middle = (left + right) / 2; + if (array[middle].pc == pc) + return middle; + else if (array[middle].pc < pc) + left = middle + 1; + else + right = middle - 1; + } + return -1; +} + +static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tcache_id, + const struct linkage *targets, int target_count, + const struct linkage *links, int link_count) +{ + struct block_link *bl; + int u, v, tmp; + + emith_flush(); + for (u = 0; u < link_count; u++) { + emith_pool_check(); + // look up local branch targets + if (links[u].mask & 0x2) { + v = find_in_sorted_linkage(targets, target_count, links[u].pc); + if (v < 0 || ! targets[v].ptr) { + // forward branch not yet resolved, prepare external linking + emith_jump_patch(links[u].ptr, tcache_ptr, NULL); + bl = dr_prepare_ext_branch(block->entryp, links[u].pc, sh2->is_slave, tcache_id); + if (bl) + bl->type = BL_LDJMP; + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, links[u].pc); + rcache_free_tmp(tmp); + emith_jump_patchable(sh2_drc_dispatcher); + } else if (emith_jump_patch_inrange(links[u].ptr, targets[v].ptr)) { + // inrange local branch + emith_jump_patch(links[u].ptr, targets[v].ptr, NULL); + } else { + // far local branch + emith_jump_patch(links[u].ptr, tcache_ptr, NULL); + emith_jump(targets[v].ptr); + } + } else { + // external or exit, emit blx area entry + void *target = (links[u].mask & 0x1 ? sh2_drc_exit : sh2_drc_dispatcher); + if (links[u].bl) + links[u].bl->blx = tcache_ptr; + emith_jump_patch(links[u].ptr, tcache_ptr, NULL); + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, links[u].pc & ~1); + rcache_free_tmp(tmp); + emith_jump(target); + } + } } #define DELAY_SAVE_T(sr) { \ + int t_ = rcache_get_tmp(); \ emith_bic_r_imm(sr, T_save); \ - emith_tst_r_imm(sr, T); \ - EMITH_SJMP_START(DCOND_EQ); \ - emith_or_r_imm_c(DCOND_NE, sr, T_save); \ - EMITH_SJMP_END(DCOND_EQ); \ + emith_and_r_r_imm(t_, sr, 1); \ + emith_or_r_r_lsl(sr, t_, T_SHIFT); \ + rcache_free_tmp(t_); \ } #define FLUSH_CYCLES(sr) \ - if (cycles > 0) { \ + if (cycles > 0) \ emith_sub_r_imm(sr, cycles << 12); \ - cycles = 0; \ - } + else if (cycles < 0) /* may happen after a branch not taken */ \ + emith_add_r_imm(sr, -cycles << 12); \ + cycles = 0; \ -static void *dr_get_pc_base(u32 pc, int is_slave); +static void *dr_get_pc_base(u32 pc, SH2 *sh2); +static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, int free); static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { - u32 branch_target_pc[MAX_LOCAL_BRANCHES]; - void *branch_target_ptr[MAX_LOCAL_BRANCHES]; + // branch targets in current block + static struct linkage branch_targets[MAX_LOCAL_TARGETS]; int branch_target_count = 0; - void *branch_patch_ptr[MAX_LOCAL_BRANCHES]; - u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; - int branch_patch_count = 0; - u32 literal_addr[MAX_LITERALS]; - int literal_addr_count = 0; - u8 op_flags[BLOCK_INSN_LIMIT]; - struct { + // unresolved local or external targets with block link/exit area if needed + static struct linkage blx_targets[MAX_LOCAL_BRANCHES]; + int blx_target_count = 0; + + static u8 op_flags[BLOCK_INSN_LIMIT]; + + enum flg_states { FLG_UNKNOWN, FLG_UNUSED, FLG_0, FLG_1 }; + struct drcf { + int delay_reg:8; + u32 loop_type:8; + u32 polling:8; + u32 pinning:1; u32 test_irq:1; u32 pending_branch_direct:1; u32 pending_branch_indirect:1; - u32 literals_disabled:1; + u32 Tflag:2, Mflag:2; } drcf = { 0, }; +#if LOOP_OPTIMIZER + // loops with pinned registers for optimzation + // pinned regs are like statics and don't need saving/restoring inside a loop + static struct linkage pinned_loops[MAX_LOCAL_TARGETS/16]; + int pinned_loop_count = 0; +#endif + // PC of current, first, last SH2 insn u32 pc, base_pc, end_pc; - u32 end_literals; - void *block_entry_ptr; + u32 base_literals, end_literals; + u8 *block_entry_ptr; struct block_desc *block; + struct block_entry *entry; + struct block_link *bl; u16 *dr_pc_base; struct op_data *opd; int blkid_main = 0; int skip_op = 0; - u32 tmp, tmp2; + int tmp, tmp2; int cycles; int i, v; + u32 u, m1, m2, m3, m4; int op; + u16 crc; base_pc = sh2->pc; - drcf.literals_disabled = literal_disabled_frames != 0; // get base/validate PC - dr_pc_base = dr_get_pc_base(base_pc, sh2->is_slave); + dr_pc_base = dr_get_pc_base(base_pc, sh2); if (dr_pc_base == (void *)-1) { - printf("invalid PC, aborting: %08x\n", base_pc); + printf("invalid PC, aborting: %08lx\n", (long)base_pc); // FIXME: be less destructive exit(1); } - tcache_ptr = tcache_ptrs[tcache_id]; + // initial passes to disassemble and analyze the block + crc = scan_block(base_pc, sh2->is_slave, op_flags, &end_pc, &base_literals, &end_literals); + end_literals = dr_check_nolit(base_literals, end_literals, tcache_id); + if (base_literals == end_literals) // map empty lit section to end of code + base_literals = end_literals = end_pc; - // predict tcache overflow - tmp = tcache_ptr - tcache_bases[tcache_id]; - if (tmp > tcache_sizes[tcache_id] - MAX_BLOCK_SIZE) { - dbg(1, "tcache %d overflow", tcache_id); - return NULL; + // if there is already a translated but inactive block, reuse it + block = dr_find_inactive_block(tcache_id, crc, base_pc, end_pc - base_pc, + base_literals, end_literals - base_literals); + +#if (DRC_DEBUG & (256|512)) + // remove any (partial) old blocks which might get in the way, to make sure + // the same branch targets are used in the recording/playback code. Not needed + // normally since the SH2 code wasn't overwritten and should be the same. + sh2_smc_rm_blocks(base_pc, end_pc - base_pc, tcache_id, 0); +#endif + + if (block) { + dbg(2, "== %csh2 reuse block %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm', + base_pc, end_pc, base_literals, end_literals, block->entryp->tcache_ptr); + dr_activate_block(block, tcache_id, sh2->is_slave); + emith_update_cache(); + return block->entryp[0].tcache_ptr; } - // initial passes to disassemble and analyze the block - scan_block(base_pc, sh2->is_slave, op_flags, &end_pc, &end_literals); + // collect branch_targets that don't land on delay slots + m1 = m2 = m3 = m4 = v = op = 0; + for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) { + if (op_flags[i] & OF_DELAY_OP) + op_flags[i] &= ~OF_BTARGET; + if (op_flags[i] & OF_BTARGET) { + if (branch_target_count < ARRAY_SIZE(branch_targets)) + branch_targets[branch_target_count++] = (struct linkage) { .pc = pc }; + else { + printf("warning: linkage overflow\n"); + end_pc = pc; + break; + } + } + if (ops[i].op == OP_LDC && (ops[i].dest & BITMASK1(SHR_SR)) && pc+2 < end_pc) + op_flags[i+1] |= OF_BTARGET; // RTE entrypoint in case of SR.IMASK change + // unify T and SR since rcache doesn't know about "virtual" guest regs + if (ops[i].source & BITMASK1(SHR_T)) ops[i].source |= BITMASK1(SHR_SR); + if (ops[i].dest & BITMASK1(SHR_T)) ops[i].source |= BITMASK1(SHR_SR); + if (ops[i].dest & BITMASK1(SHR_T)) ops[i].dest |= BITMASK1(SHR_SR); +#if LOOP_DETECTION + // loop types detected: + // 1. target: ... BRA target -> idle loop + // 2. target: ... delay insn ... BF target -> delay loop + // 3. target: ... poll insn ... BF/BT target -> poll loop + // 4. target: ... poll insn ... BF/BT exit ... BRA target, exit: -> poll + // conditions: + // a. no further branch targets between target and back jump. + // b. no unconditional branch insn inside the loop. + // c. exactly one poll or delay insn is allowed inside a delay/poll loop + // (scan_block marks loops only if they meet conditions a through c) + // d. idle loops do not modify anything but PC,SR and contain no branches + // e. delay/poll loops do not modify anything but the concerned reg,PC,SR + // f. loading constants into registers inside the loop is allowed + // g. a delay/poll loop must have a conditional branch somewhere + // h. an idle loop must not have a conditional branch + if (op_flags[i] & OF_BTARGET) { + // possible loop entry point + drcf.loop_type = op_flags[i] & OF_LOOP; + drcf.pending_branch_direct = drcf.pending_branch_indirect = 0; + op = OF_IDLE_LOOP; // loop type + v = i; + m1 = m2 = m3 = m4 = 0; + if (!drcf.loop_type) // reset basic loop it it isn't recognized as loop + op_flags[i] &= ~OF_BASIC_LOOP; + } + if (drcf.loop_type) { + // calculate reg masks for loop pinning + m4 |= ops[i].source & ~m3; + m3 |= ops[i].dest; + // detect loop type, and store poll/delay register + if (op_flags[i] & OF_POLL_INSN) { + op = OF_POLL_LOOP; + m1 |= ops[i].dest; // loop poll/delay regs + } else if (op_flags[i] & OF_DELAY_INSN) { + op = OF_DELAY_LOOP; + m1 |= ops[i].dest; + } else if (ops[i].op != OP_LOAD_POOL && ops[i].op != OP_LOAD_CONST + && (ops[i].op != OP_MOVE || op != OF_POLL_LOOP)) { + // not (MOV @(PC) or MOV # or (MOV reg and poll)), condition f + m2 |= ops[i].dest; // regs modified by other insns + } + // branch detector + if (OP_ISBRAIMM(ops[i].op)) { + if (ops[i].imm == base_pc + 2*v) + drcf.pending_branch_direct = 1; // backward branch detected + else + op_flags[v] &= ~OF_BASIC_LOOP; // no basic loop + } + if (OP_ISBRACND(ops[i].op)) + drcf.pending_branch_indirect = 1; // conditions g,h - cond.branch + // poll/idle loops terminate with their backwards branch to the loop start + if (drcf.pending_branch_direct && !(op_flags[i+1] & OF_DELAY_OP)) { + m2 &= ~(m1 | BITMASK3(SHR_PC, SHR_SR, SHR_T)); // conditions d,e + g,h + if (m2 || ((op == OF_IDLE_LOOP) == (drcf.pending_branch_indirect))) + op = 0; // conditions not met + op_flags[v] = (op_flags[v] & ~OF_LOOP) | op; // set loop type + drcf.loop_type = 0; +#if LOOP_OPTIMIZER + if (op_flags[v] & OF_BASIC_LOOP) { + m3 &= ~rcache_regs_static & ~BITMASK5(SHR_PC, SHR_PR, SHR_SR, SHR_T, SHR_MEM); + if (m3 && count_bits(m3) < count_bits(rcache_vregs_reg) && + pinned_loop_count < ARRAY_SIZE(pinned_loops)-1) { + pinned_loops[pinned_loop_count++] = + (struct linkage) { .pc = base_pc + 2*v, .mask = m3 }; + } else + op_flags[v] &= ~OF_BASIC_LOOP; + } +#endif + } + } +#endif + } - if (drcf.literals_disabled) - end_literals = end_pc; + tcache_ptr = dr_prepare_cache(tcache_id, (end_pc - base_pc) / 2, branch_target_count); +#if (DRC_DEBUG & 4) + tcache_dsm_ptrs[tcache_id] = tcache_ptr; +#endif - block = dr_add_block(base_pc, end_literals - base_pc, - end_pc - base_pc, sh2->is_slave, &blkid_main); + block = dr_add_block(branch_target_count, base_pc, end_pc - base_pc, + base_literals, end_literals-base_literals, crc, sh2->is_slave, &blkid_main); if (block == NULL) return NULL; block_entry_ptr = tcache_ptr; - dbg(2, "== %csh2 block #%d,%d %08x-%08x -> %p", sh2->is_slave ? 's' : 'm', - tcache_id, blkid_main, base_pc, end_pc, block_entry_ptr); + dbg(2, "== %csh2 block #%d,%d %08x-%08x,%08x-%08x -> %p", sh2->is_slave ? 's' : 'm', + tcache_id, blkid_main, base_pc, end_pc, base_literals, end_literals, block_entry_ptr); - dr_link_blocks(&block->entryp[0], tcache_id); - - // collect branch_targets that don't land on delay slots - for (pc = base_pc, i = 0; pc < end_pc; i++, pc += 2) { - if (!(op_flags[i] & OF_BTARGET)) - continue; - if (op_flags[i] & OF_DELAY_OP) { - op_flags[i] &= ~OF_BTARGET; - continue; - } - ADD_TO_ARRAY(branch_target_pc, branch_target_count, pc, break); - } - - if (branch_target_count > 0) { - memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count); - } // clear stale state after compile errors rcache_invalidate(); + emith_invalidate_t(); + drcf = (struct drcf) { 0 }; +#if LOOP_OPTIMIZER + pinned_loops[pinned_loop_count].pc = -1; + pinned_loop_count = 0; +#endif // ------------------------------------------------- // 3rd pass: actual compilation @@ -1454,90 +3464,217 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) for (i = 0; pc < end_pc; i++) { u32 delay_dep_fw = 0, delay_dep_bk = 0; - u32 tmp3, tmp4, sr; + int tmp3, tmp4; + int sr; - opd = &ops[i]; - op = FETCH_OP(pc); - -#if (DRC_DEBUG & 2) - insns_compiled++; -#endif -#if (DRC_DEBUG & 4) - DasmSH2(sh2dasm_buff, pc, op); - printf("%c%08x %04x %s\n", (op_flags[i] & OF_BTARGET) ? '*' : ' ', - pc, op, sh2dasm_buff); -#endif - - if ((op_flags[i] & OF_BTARGET) || pc == base_pc) + if (op_flags[i] & OF_BTARGET) { if (pc != base_pc) { - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); + emith_sync_t(sr); + drcf.Mflag = FLG_UNKNOWN; rcache_flush(); - - // make block entry - v = block->entry_count; - if (v < ARRAY_SIZE(block->entryp)) { - block->entryp[v].pc = pc; - block->entryp[v].tcache_ptr = tcache_ptr; - block->entryp[v].links = NULL; -#if (DRC_DEBUG & 2) - block->entryp[v].block = block; -#endif - add_to_hashlist(&block->entryp[v], tcache_id); - block->entry_count++; - - dbg(2, "-- %csh2 block #%d,%d entry %08x -> %p", - sh2->is_slave ? 's' : 'm', tcache_id, blkid_main, - pc, tcache_ptr); - - // since we made a block entry, link any other blocks - // that jump to current pc - dr_link_blocks(&block->entryp[v], tcache_id); - } - else { - dbg(1, "too many entryp for block #%d,%d pc=%08x", - tcache_id, blkid_main, pc); - } - - do_host_disasm(tcache_id); + emith_flush(); } - v = find_in_array(branch_target_pc, branch_target_count, pc); - if (v >= 0) - branch_target_ptr[v] = tcache_ptr; + // make block entry + v = block->entry_count; + entry = &block->entryp[v]; + if (v < branch_target_count) + { + entry = &block->entryp[v]; + entry->pc = pc; + entry->tcache_ptr = tcache_ptr; + entry->links = entry->o_links = NULL; +#if (DRC_DEBUG & 2) + entry->block = block; +#endif + block->entry_count++; + + dbg(2, "-- %csh2 block #%d,%d entry %08x -> %p", + sh2->is_slave ? 's' : 'm', tcache_id, blkid_main, + pc, tcache_ptr); + } + else { + dbg(1, "too many entryp for block #%d,%d pc=%08x", + tcache_id, blkid_main, pc); + break; + } + + v = find_in_sorted_linkage(branch_targets, branch_target_count, pc); + if (v >= 0) + branch_targets[v].ptr = tcache_ptr; +#if LOOP_DETECTION + drcf.loop_type = op_flags[i] & OF_LOOP; + drcf.delay_reg = -1; + drcf.polling = (drcf.loop_type == OF_POLL_LOOP ? MF_POLLING : 0); +#endif - // must update PC - emit_move_r_imm32(SHR_PC, pc); rcache_clean(); +#if (DRC_DEBUG & 0x10) + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, pc); + tmp = emit_memhandler_read(1); + tmp2 = rcache_get_tmp(); + tmp3 = rcache_get_tmp(); + emith_move_r_imm(tmp2, (s16)FETCH_OP(pc)); + emith_move_r_imm(tmp3, 0); + emith_cmp_r_r(tmp, tmp2); + EMITH_SJMP_START(DCOND_EQ); + emith_read_r_r_offs_c(DCOND_NE, tmp3, tmp3, 0); // crash + EMITH_SJMP_END(DCOND_EQ); + rcache_free_tmp(tmp); + rcache_free_tmp(tmp2); + rcache_free_tmp(tmp3); +#endif + // check cycles - sr = rcache_get_reg(SHR_SR, RC_GR_READ); - emith_cmp_r_imm(sr, 0); - emith_jump_cond(DCOND_LE, sh2_drc_exit); + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + +#if LOOP_OPTIMIZER + if (op_flags[i] & OF_BASIC_LOOP) { + if (pinned_loops[pinned_loop_count].pc == pc) { + // pin needed regs on loop entry + FOR_ALL_BITS_SET_DO(pinned_loops[pinned_loop_count].mask, v, rcache_pin_reg(v)); + emith_flush(); + // store current PC as loop target + pinned_loops[pinned_loop_count].ptr = tcache_ptr; + drcf.pinning = 1; + } else + op_flags[i] &= ~OF_BASIC_LOOP; + } + + if (op_flags[i] & OF_BASIC_LOOP) { + // if exiting a pinned loop pinned regs must be written back to ctx + // since they are reloaded in the loop entry code + emith_cmp_r_imm(sr, 0); + EMITH_JMP_START(DCOND_GE); + rcache_save_pinned(); + + if (blx_target_count < ARRAY_SIZE(blx_targets)) { + // exit via stub in blx table (saves some 1-3 insns in the main flow) + blx_targets[blx_target_count++] = + (struct linkage) { .pc = pc, .ptr = tcache_ptr, .mask = 0x1 }; + emith_jump_patchable(tcache_ptr); + } else { + // blx table full, must inline exit code + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, pc); + emith_jump(sh2_drc_exit); + rcache_free_tmp(tmp); + } + EMITH_JMP_END(DCOND_GE); + } else +#endif + { + if (blx_target_count < ARRAY_SIZE(blx_targets)) { + // exit via stub in blx table (saves some 1-3 insns in the main flow) + emith_cmp_r_imm(sr, 0); + blx_targets[blx_target_count++] = + (struct linkage) { .pc = pc, .ptr = tcache_ptr, .mask = 0x1 }; + emith_jump_cond_patchable(DCOND_LT, tcache_ptr); + } else { + // blx table full, must inline exit code + tmp = rcache_get_tmp_arg(0); + emith_cmp_r_imm(sr, 0); + EMITH_SJMP_START(DCOND_GE); + emith_move_r_imm_c(DCOND_LT, tmp, pc); + emith_jump_cond(DCOND_LT, sh2_drc_exit); + EMITH_SJMP_END(DCOND_GE); + rcache_free_tmp(tmp); + } + } + +#if (DRC_DEBUG & 32) + // block hit counter + tmp = rcache_get_tmp_arg(0); + tmp2 = rcache_get_tmp_arg(1); + emith_move_r_ptr_imm(tmp, (uptr)entry); + emith_read_r_r_offs(tmp2, tmp, offsetof(struct block_entry, entry_count)); + emith_add_r_imm(tmp2, 1); + emith_write_r_r_offs(tmp2, tmp, offsetof(struct block_entry, entry_count)); + rcache_free_tmp(tmp); + rcache_free_tmp(tmp2); +#endif + +#if (DRC_DEBUG & (8|256|512|1024)) + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); + rcache_clean(); + tmp = rcache_used_hregs_mask(); + emith_save_caller_regs(tmp); + emit_do_static_regs(1, 0); + rcache_get_reg_arg(2, SHR_SR, NULL); + tmp2 = rcache_get_tmp_arg(0); + tmp3 = rcache_get_tmp_arg(1); + tmp4 = rcache_get_tmp(); + emith_move_r_ptr_imm(tmp2, tcache_ptr); + emith_move_r_r_ptr(tmp3, CONTEXT_REG); + emith_move_r_imm(tmp4, pc); + emith_ctx_write(tmp4, SHR_PC * 4); + rcache_invalidate_tmp(); + emith_abicall(sh2_drc_log_entry); + emith_restore_caller_regs(tmp); +#endif + do_host_disasm(tcache_id); rcache_unlock_all(); } #ifdef DRC_CMP if (!(op_flags[i] & OF_DELAY_OP)) { - emit_move_r_imm32(SHR_PC, pc); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); + emith_sync_t(sr); + emit_move_r_imm32(SHR_PC, pc); rcache_clean(); - tmp = rcache_used_hreg_mask(); + tmp = rcache_used_hregs_mask(); emith_save_caller_regs(tmp); emit_do_static_regs(1, 0); emith_pass_arg_r(0, CONTEXT_REG); - emith_call(do_sh2_cmp); + emith_abicall(do_sh2_cmp); emith_restore_caller_regs(tmp); } #endif - pc += 2; + // emit blx area if limits are approached + if (blx_target_count && (blx_target_count > ARRAY_SIZE(blx_targets)-4 || + !emith_jump_patch_inrange(blx_targets[0].ptr, tcache_ptr+0x100))) { + u8 *jp; + rcache_invalidate_tmp(); + jp = tcache_ptr; + emith_jump_patchable(tcache_ptr); + emit_branch_linkage_code(sh2, block, tcache_id, branch_targets, + branch_target_count, blx_targets, blx_target_count); + blx_target_count = 0; + do_host_disasm(tcache_id); + emith_jump_patch(jp, tcache_ptr, NULL); + } + emith_pool_check(); + + opd = &ops[i]; + op = FETCH_OP(pc); +#if (DRC_DEBUG & 4) + DasmSH2(sh2dasm_buff, pc, op); + if (op_flags[i] & OF_BTARGET) { + if ((op_flags[i] & OF_LOOP) == OF_DELAY_LOOP) tmp3 = '+'; + else if ((op_flags[i] & OF_LOOP) == OF_POLL_LOOP) tmp3 = '='; + else if ((op_flags[i] & OF_LOOP) == OF_IDLE_LOOP) tmp3 = '~'; + else tmp3 = '*'; + } else if (drcf.loop_type) tmp3 = '.'; + else tmp3 = ' '; + printf("%c%08lx %04x %s\n", tmp3, (ulong)pc, op, sh2dasm_buff); +#endif + + pc += 2; +#if (DRC_DEBUG & 2) + insns_compiled++; +#endif if (skip_op > 0) { skip_op--; continue; @@ -1549,7 +3686,8 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) delay_dep_fw = opd->dest & ops[i-1].source; delay_dep_bk = opd->source & ops[i-1].dest; if (delay_dep_fw & BITMASK1(SHR_T)) { - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); DELAY_SAVE_T(sr); } if (delay_dep_bk & BITMASK1(SHR_PC)) { @@ -1558,8 +3696,9 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) elprintf_sh2(sh2, EL_ANOMALY, "drc: illegal slot insn %04x @ %08x?", op, pc - 2); } + // store PC for MOVA/MOV @PC address calculation if (opd->imm != 0) - ; // addr already resolved somehow + ; // case OP_BRANCH - addr already resolved in scan_block else { switch (ops[i-1].op) { case OP_BRANCH: @@ -1567,12 +3706,18 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) break; case OP_BRANCH_CT: case OP_BRANCH_CF: - tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE); - sr = rcache_get_reg(SHR_SR, RC_GR_READ); + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE, NULL); emith_move_r_imm(tmp, pc); - emith_tst_r_imm(sr, T); - tmp2 = ops[i-1].op == OP_BRANCH_CT ? DCOND_NE : DCOND_EQ; + tmp2 = emith_tst_t(sr, (ops[i-1].op == OP_BRANCH_CT)); + tmp3 = emith_invert_cond(tmp2); + EMITH_SJMP_START(tmp3); emith_move_r_imm_c(tmp2, tmp, ops[i-1].imm); + EMITH_SJMP_END(tmp3); + break; + case OP_BRANCH_N: // BT/BF known not to be taken + // XXX could modify opd->imm instead? + emit_move_r_imm32(SHR_PC, pc); break; // case OP_BRANCH_R OP_BRANCH_RF - PC already loaded } @@ -1584,8 +3729,38 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) dbg(1, "unhandled delay_dep_bk: %x", delay_dep_bk); } + // inform cache about future register usage + u32 late = 0; // regs read by future ops + u32 write = 0; // regs written to (to detect write before read) + u32 soon = 0; // regs read soon + for (v = 1; v <= 9; v++) { + // no sense in looking any further than the next rcache flush + tmp = ((op_flags[i+v] & OF_BTARGET) || (op_flags[i+v-1] & OF_DELAY_OP) || + (OP_ISBRACND(opd[v-1].op) && !(op_flags[i+v] & OF_DELAY_OP))); + // XXX looking behind cond branch to avoid evicting regs used later? + if (pc + 2*v <= end_pc && !tmp) { // (pc already incremented above) + late |= opd[v].source & ~write; + // ignore source regs after they have been written to + write |= opd[v].dest; + // regs needed in the next few instructions + if (v <= 4) + soon = late; + } else + break; + } + rcache_set_usage_now(opd[0].source); // current insn + rcache_set_usage_soon(soon); // insns 1-4 + rcache_set_usage_late(late & ~soon); // insns 5-9 + rcache_set_usage_discard(write & ~(late|soon)); + if (v <= 9) + // upcoming rcache_flush, start writing back unused dirty stuff + rcache_clean_masked(rcache_dirty_mask() & ~(write|opd[0].dest)); + switch (opd->op) { + case OP_BRANCH_N: + // never taken, just use up cycles + goto end_op; case OP_BRANCH: case OP_BRANCH_CT: case OP_BRANCH_CF: @@ -1602,61 +3777,84 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto end_op; case OP_BRANCH_RF: - tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp = rcache_get_reg(SHR_PC, RC_GR_WRITE, NULL); + emith_move_r_imm(tmp, pc + 2); if (opd->dest & BITMASK1(SHR_PR)) { - tmp3 = rcache_get_reg(SHR_PR, RC_GR_WRITE); - emith_move_r_imm(tmp3, pc + 2); - emith_add_r_r_r(tmp, tmp2, tmp3); - } - else { - emith_move_r_r(tmp, tmp2); - emith_add_r_imm(tmp, pc + 2); + tmp3 = rcache_get_reg(SHR_PR, RC_GR_WRITE, NULL); + emith_move_r_r(tmp3, tmp); } + emith_add_r_r(tmp, tmp2); + if (gconst_get(GET_Rn(), &u)) + gconst_set(SHR_PC, pc + 2 + u); drcf.pending_branch_indirect = 1; goto end_op; - case OP_SLEEP: + case OP_SLEEP: // SLEEP 0000000000011011 printf("TODO sleep\n"); goto end_op; - case OP_RTE: + case OP_RTE: // RTE 0000000000101011 + emith_invalidate_t(); // pop PC - emit_memhandler_read_rr(SHR_PC, SHR_SP, 0, 2); + tmp = emit_memhandler_read_rr(sh2, SHR_PC, SHR_SP, 0, 2 | MF_POSTINCR); + rcache_free(tmp); // pop SR - tmp = rcache_get_reg_arg(0, SHR_SP); - emith_add_r_imm(tmp, 4); - tmp = emit_memhandler_read(2); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp = emit_memhandler_read_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_POSTINCR); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_write_sr(sr, tmp); rcache_free_tmp(tmp); - tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); - emith_add_r_imm(tmp, 4*2); drcf.test_irq = 1; drcf.pending_branch_indirect = 1; goto end_op; + case OP_UNDEFINED: + elprintf_sh2(sh2, EL_ANOMALY, "drc: unhandled op %04x @ %08x", op, pc-2); + opd->imm = (op_flags[i] & OF_B_IN_DS) ? 6 : 4; + // fallthrough + case OP_TRAPA: // TRAPA #imm 11000011iiiiiiii + // push SR + tmp = rcache_get_reg_arg(1, SHR_SR, &tmp2); + emith_sync_t(tmp2); + emith_clear_msb(tmp, tmp2, 22); + emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR); + // push PC + if (opd->op == OP_TRAPA) { + tmp = rcache_get_tmp_arg(1); + emith_move_r_imm(tmp, pc); + } else if (drcf.pending_branch_indirect) { + tmp = rcache_get_reg_arg(1, SHR_PC, NULL); + } else { + tmp = rcache_get_tmp_arg(1); + emith_move_r_imm(tmp, pc - 2); + } + emit_memhandler_write_rr(sh2, SHR_TMP, SHR_SP, 0, 2 | MF_PREDECR); + // obtain new PC + emit_memhandler_read_rr(sh2, SHR_PC, SHR_VBR, opd->imm * 4, 2); + // indirect jump -> back to dispatcher + drcf.pending_branch_indirect = 1; + goto end_op; + case OP_LOAD_POOL: #if PROPAGATE_CONSTANTS - if (opd->imm != 0 && opd->imm < end_literals - && literal_addr_count < MAX_LITERALS) + if ((opd->imm && opd->imm >= base_pc && opd->imm < end_literals) || + p32x_sh2_mem_is_rom(opd->imm, sh2)) { - ADD_TO_ARRAY(literal_addr, literal_addr_count, opd->imm,); if (opd->size == 2) - tmp = FETCH32(opd->imm); + u = FETCH32(opd->imm); else - tmp = (u32)(int)(signed short)FETCH_OP(opd->imm); - gconst_new(GET_Rn(), tmp); + u = (s16)FETCH_OP(opd->imm); + gconst_new(GET_Rn(), u); } else #endif { - tmp = rcache_get_tmp_arg(0); - if (opd->imm != 0) + if (opd->imm != 0) { + tmp = rcache_get_tmp_arg(0); emith_move_r_imm(tmp, opd->imm); - else { - // have to calculate read addr from PC - tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ); + } else { + // have to calculate read addr from PC for delay slot + tmp = rcache_get_reg_arg(0, SHR_PC, &tmp2); if (opd->size == 2) { emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 4); emith_bic_r_imm(tmp, 3); @@ -1665,21 +3863,21 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 2); } tmp2 = emit_memhandler_read(opd->size); - tmp3 = rcache_get_reg(GET_Rn(), RC_GR_WRITE); - if (opd->size == 2) + tmp3 = rcache_map_reg(GET_Rn(), tmp2); + if (tmp3 != tmp2) { emith_move_r_r(tmp3, tmp2); - else - emith_sext(tmp3, tmp2, 16); - rcache_free_tmp(tmp2); + rcache_free_tmp(tmp2); + } } goto end_op; - case OP_MOVA: + case OP_MOVA: // MOVA @(disp,PC),R0 11000111dddddddd if (opd->imm != 0) emit_move_r_imm32(SHR_R0, opd->imm); else { - tmp = rcache_get_reg(SHR_R0, RC_GR_WRITE); - tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ); + // have to calculate addr from PC for delay slot + tmp2 = rcache_get_reg(SHR_PC, RC_GR_READ, NULL); + tmp = rcache_get_reg(SHR_R0, RC_GR_WRITE, NULL); emith_add_r_r_imm(tmp, tmp2, 2 + (op & 0xff) * 4); emith_bic_r_imm(tmp, 3); } @@ -1693,7 +3891,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) switch (op & 0x0f) { case 0x02: - tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE); switch (GET_Fx()) { case 0: // STC SR,Rn 0000nnnn00000010 @@ -1708,38 +3905,43 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) default: goto default_; } - tmp3 = rcache_get_reg(tmp2, RC_GR_READ); - emith_move_r_r(tmp, tmp3); - if (tmp2 == SHR_SR) - emith_clear_msb(tmp, tmp, 22); // reserved bits defined by ISA as 0 + if (tmp2 == SHR_SR) { + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + emith_sync_t(sr); + tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL); + emith_clear_msb(tmp, sr, 22); // reserved bits defined by ISA as 0 + } else + emit_move_r_r(GET_Rn(), tmp2); goto end_op; case 0x04: // MOV.B Rm,@(R0,Rn) 0000nnnnmmmm0100 case 0x05: // MOV.W Rm,@(R0,Rn) 0000nnnnmmmm0101 case 0x06: // MOV.L Rm,@(R0,Rn) 0000nnnnmmmm0110 - rcache_clean(); - tmp = rcache_get_reg_arg(1, GET_Rm()); - tmp2 = rcache_get_reg_arg(0, SHR_R0); - tmp3 = rcache_get_reg(GET_Rn(), RC_GR_READ); - emith_add_r_r(tmp2, tmp3); - emit_memhandler_write(op & 3); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + emit_indirect_indexed_write(sh2, GET_Rm(), SHR_R0, GET_Rn(), op & 3); goto end_op; - case 0x07: - // MUL.L Rm,Rn 0000nnnnmmmm0111 - tmp = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE); + case 0x07: // MUL.L Rm,Rn 0000nnnnmmmm0111 + tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL); emith_mul(tmp3, tmp2, tmp); goto end_op; case 0x08: switch (GET_Fx()) { case 0: // CLRT 0000000000001000 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_bic_r_imm(sr, T); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); +#if T_OPTIMIZER + if (~rcache_regs_discard & BITMASK1(SHR_T)) +#endif + emith_set_t(sr, 0); break; case 1: // SETT 0000000000011000 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_or_r_imm(sr, T); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); +#if T_OPTIMIZER + if (~rcache_regs_discard & BITMASK1(SHR_T)) +#endif + emith_set_t(sr, 1); break; case 2: // CLRMAC 0000000000101000 emit_move_r_imm32(SHR_MACL, 0); @@ -1755,12 +3957,66 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0: // NOP 0000000000001001 break; case 1: // DIV0U 0000000000011001 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_invalidate_t(); emith_bic_r_imm(sr, M|Q|T); + drcf.Mflag = FLG_0; +#if DIV_OPTIMIZER + if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) { + // divide 32/16 + rcache_get_reg_arg(0, div(opd).rn, NULL); + rcache_get_reg_arg(2, div(opd).rm, NULL); + tmp = rcache_get_tmp_arg(1); + emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp)); + rcache_invalidate_tmp(); + emith_abicall(sh2_drc_divu32); + tmp = rcache_get_tmp_ret(); + tmp2 = rcache_map_reg(div(opd).rn, tmp); + if (tmp != tmp2) + emith_move_r_r(tmp2, tmp); + + tmp3 = rcache_get_tmp(); + emith_and_r_r_imm(tmp3, tmp2, 1); // Q = !Rn[0] + emith_eor_r_r_imm(tmp3, tmp3, 1); + emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); + emith_ctx_read(tmp3, offsetof(SH2, drc_tmp)); + emith_or_r_r_r(sr, sr, tmp3); // T + rcache_free_tmp(tmp3); + skip_op = div(opd).div1 + div(opd).rotcl; + cycles += skip_op; + } + else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) { + // divide 64/32 + tmp4 = rcache_get_reg(div(opd).ro, RC_GR_READ, NULL); + emith_ctx_write(tmp4, offsetof(SH2, drc_tmp)); + rcache_free(tmp4); + rcache_get_reg_arg(0, div(opd).rn, NULL); + rcache_get_reg_arg(2, div(opd).rm, NULL); + tmp = rcache_get_tmp_arg(1); + emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp)); + rcache_invalidate_tmp(); + emith_abicall(sh2_drc_divu64); + tmp = rcache_get_tmp_ret(); + tmp2 = rcache_map_reg(div(opd).rn, tmp); + tmp4 = rcache_get_reg(div(opd).ro, RC_GR_WRITE, NULL); + if (tmp != tmp2) + emith_move_r_r(tmp2, tmp); + emith_ctx_read(tmp4, offsetof(SH2, drc_tmp)); + + tmp3 = rcache_get_tmp(); + emith_and_r_r_imm(tmp3, tmp4, 1); // Q = !Ro[0] + emith_eor_r_r_imm(tmp3, tmp3, 1); + emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); + rcache_free_tmp(tmp3); + skip_op = div(opd).div1 + div(opd).rotcl; + cycles += skip_op; + } +#endif break; case 2: // MOVT Rn 0000nnnn00101001 - sr = rcache_get_reg(SHR_SR, RC_GR_READ); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE); + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + emith_sync_t(sr); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL); emith_clear_msb(tmp2, sr, 31); break; default: @@ -1768,7 +4024,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } goto end_op; case 0x0a: - tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE); switch (GET_Fx()) { case 0: // STS MACH,Rn 0000nnnn00001010 @@ -1783,64 +4038,32 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) default: goto default_; } - tmp2 = rcache_get_reg(tmp2, RC_GR_READ); - emith_move_r_r(tmp, tmp2); + emit_move_r_r(GET_Rn(), tmp2); goto end_op; case 0x0c: // MOV.B @(R0,Rm),Rn 0000nnnnmmmm1100 case 0x0d: // MOV.W @(R0,Rm),Rn 0000nnnnmmmm1101 case 0x0e: // MOV.L @(R0,Rm),Rn 0000nnnnmmmm1110 - tmp = emit_indirect_indexed_read(SHR_R0, GET_Rm(), op & 3); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE); - if ((op & 3) != 2) { - emith_sext(tmp2, tmp, (op & 1) ? 16 : 8); - } else - emith_move_r_r(tmp2, tmp); - rcache_free_tmp(tmp); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + emit_indirect_indexed_read(sh2, GET_Rn(), SHR_R0, GET_Rm(), (op & 3) | drcf.polling); goto end_op; case 0x0f: // MAC.L @Rm+,@Rn+ 0000nnnnmmmm1111 - emit_indirect_read_double(&tmp, &tmp2, GET_Rn(), GET_Rm(), 2); - tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW); - /* MS 16 MAC bits unused if saturated */ - sr = rcache_get_reg(SHR_SR, RC_GR_READ); - emith_tst_r_imm(sr, S); - EMITH_SJMP_START(DCOND_EQ); - emith_clear_msb_c(DCOND_NE, tmp4, tmp4, 16); - EMITH_SJMP_END(DCOND_EQ); - rcache_unlock(sr); - tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW); // might evict SR - emith_mula_s64(tmp3, tmp4, tmp, tmp2); + emit_indirect_read_double(sh2, &tmp, &tmp2, GET_Rn(), GET_Rm(), 2); + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW, NULL); + tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW, NULL); + emith_sh2_macl(tmp3, tmp4, tmp, tmp2, sr); rcache_free_tmp(tmp2); - sr = rcache_get_reg(SHR_SR, RC_GR_READ); // reget just in case - emith_tst_r_imm(sr, S); - - EMITH_JMP_START(DCOND_EQ); - emith_asr(tmp, tmp4, 15); - emith_cmp_r_imm(tmp, -1); // negative overflow (0x80000000..0xffff7fff) - EMITH_SJMP_START(DCOND_GE); - emith_move_r_imm_c(DCOND_LT, tmp4, 0x8000); - emith_move_r_imm_c(DCOND_LT, tmp3, 0x0000); - EMITH_SJMP_END(DCOND_GE); - emith_cmp_r_imm(tmp, 0); // positive overflow (0x00008000..0x7fffffff) - EMITH_SJMP_START(DCOND_LE); - emith_move_r_imm_c(DCOND_GT, tmp4, 0x00007fff); - emith_move_r_imm_c(DCOND_GT, tmp3, 0xffffffff); - EMITH_SJMP_END(DCOND_LE); - EMITH_JMP_END(DCOND_EQ); - rcache_free_tmp(tmp); goto end_op; } goto default_; ///////////////////////////////////////////// - case 0x01: - // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd - rcache_clean(); - tmp = rcache_get_reg_arg(0, GET_Rn()); - tmp2 = rcache_get_reg_arg(1, GET_Rm()); - if (op & 0x0f) - emith_add_r_imm(tmp, (op & 0x0f) * 4); - emit_memhandler_write(2); + case 0x01: // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), (op & 0x0f) * 4, 2); goto end_op; case 0x02: @@ -1849,101 +4072,181 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x00: // MOV.B Rm,@Rn 0010nnnnmmmm0000 case 0x01: // MOV.W Rm,@Rn 0010nnnnmmmm0001 case 0x02: // MOV.L Rm,@Rn 0010nnnnmmmm0010 - rcache_clean(); - rcache_get_reg_arg(0, GET_Rn()); - rcache_get_reg_arg(1, GET_Rm()); - emit_memhandler_write(op & 3); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), 0, op & 3); goto end_op; case 0x04: // MOV.B Rm,@-Rn 0010nnnnmmmm0100 case 0x05: // MOV.W Rm,@-Rn 0010nnnnmmmm0101 case 0x06: // MOV.L Rm,@-Rn 0010nnnnmmmm0110 - rcache_get_reg_arg(1, GET_Rm()); // for Rm == Rn - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - emith_sub_r_imm(tmp, (1 << (op & 3))); - rcache_clean(); - rcache_get_reg_arg(0, GET_Rn()); - emit_memhandler_write(op & 3); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + emit_memhandler_write_rr(sh2, GET_Rm(), GET_Rn(), 0, (op & 3) | MF_PREDECR); goto end_op; case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_invalidate_t(); emith_bic_r_imm(sr, M|Q|T); - emith_tst_r_imm(tmp2, (1<<31)); - EMITH_SJMP_START(DCOND_EQ); - emith_or_r_imm_c(DCOND_NE, sr, Q); - EMITH_SJMP_END(DCOND_EQ); - emith_tst_r_imm(tmp3, (1<<31)); - EMITH_SJMP_START(DCOND_EQ); - emith_or_r_imm_c(DCOND_NE, sr, M); - EMITH_SJMP_END(DCOND_EQ); - emith_teq_r_r(tmp2, tmp3); - EMITH_SJMP_START(DCOND_PL); - emith_or_r_imm_c(DCOND_MI, sr, T); - EMITH_SJMP_END(DCOND_PL); + drcf.Mflag = FLG_UNKNOWN; +#if DIV_OPTIMIZER + if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) { + // divide 32/16 + tmp = rcache_get_reg_arg(0, div(opd).rn, NULL); + tmp2 = rcache_get_reg_arg(2, div(opd).rm, NULL); + tmp3 = rcache_get_tmp_arg(1); + emith_lsr(tmp3, tmp2, 31); + emith_or_r_r_lsl(sr, tmp3, M_SHIFT); // M = Rm[31] + emith_add_r_r_ptr_imm(tmp3, CONTEXT_REG, offsetof(SH2, drc_tmp)); + rcache_invalidate_tmp(); + emith_abicall(sh2_drc_divs32); + tmp = rcache_get_tmp_ret(); + tmp2 = rcache_map_reg(div(opd).rn, tmp); + if (tmp != tmp2) + emith_move_r_r(tmp2, tmp); + tmp3 = rcache_get_tmp(); + + emith_eor_r_r_r_lsr(tmp3, tmp2, sr, M_SHIFT); + emith_and_r_r_imm(tmp3, tmp3, 1); + emith_eor_r_r_imm(tmp3, tmp3, 1); + emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); // Q = !Rn[0]^M + emith_ctx_read(tmp3, offsetof(SH2, drc_tmp)); + emith_or_r_r_r(sr, sr, tmp3); // T + rcache_free_tmp(tmp3); + skip_op = div(opd).div1 + div(opd).rotcl; + cycles += skip_op; + } + else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) { + // divide 64/32 + tmp4 = rcache_get_reg(div(opd).ro, RC_GR_READ, NULL); + emith_ctx_write(tmp4, offsetof(SH2, drc_tmp)); + rcache_free(tmp4); + tmp = rcache_get_reg_arg(0, div(opd).rn, NULL); + tmp2 = rcache_get_reg_arg(2, div(opd).rm, NULL); + tmp3 = rcache_get_tmp_arg(1); + emith_lsr(tmp3, tmp2, 31); + emith_or_r_r_lsl(sr, tmp3, M_SHIFT); // M = Rm[31] + emith_eor_r_r_lsr(tmp3, tmp, 31); + emith_or_r_r(sr, tmp3); // T = Rn[31]^M + emith_add_r_r_ptr_imm(tmp3, CONTEXT_REG, offsetof(SH2, drc_tmp)); + rcache_invalidate_tmp(); + emith_abicall(sh2_drc_divs64); + tmp = rcache_get_tmp_ret(); + tmp2 = rcache_map_reg(div(opd).rn, tmp); + tmp4 = rcache_get_reg(div(opd).ro, RC_GR_WRITE, NULL); + if (tmp != tmp2) + emith_move_r_r(tmp2, tmp); + emith_ctx_read(tmp4, offsetof(SH2, drc_tmp)); + + tmp3 = rcache_get_tmp(); + emith_eor_r_r_r_lsr(tmp3, tmp4, sr, M_SHIFT); + emith_and_r_r_imm(tmp3, tmp3, 1); + emith_eor_r_r_imm(tmp3, tmp3, 1); + emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); // Q = !Ro[0]^M + rcache_free_tmp(tmp3); + skip_op = div(opd).div1 + div(opd).rotcl; + cycles += skip_op; + } else +#endif + { + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_tmp(); + emith_lsr(tmp, tmp2, 31); // Q = Nn + emith_or_r_r_lsl(sr, tmp, Q_SHIFT); + emith_lsr(tmp, tmp3, 31); // M = Nm + emith_or_r_r_lsl(sr, tmp, M_SHIFT); + emith_eor_r_r_lsr(tmp, tmp2, 31); + emith_or_r_r(sr, tmp); // T = Q^M + rcache_free(tmp); + } goto end_op; case 0x08: // TST Rm,Rn 0010nnnnmmmm1000 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); - emith_bic_r_imm(sr, T); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + emith_clr_t_cond(sr); emith_tst_r_r(tmp2, tmp3); - emit_or_t_if_eq(sr); + emith_set_t_cond(sr, DCOND_EQ); goto end_op; case 0x09: // AND Rm,Rn 0010nnnnmmmm1001 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - emith_and_r_r(tmp, tmp2); + if (GET_Rm() != GET_Rn()) { + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); + emith_and_r_r_r(tmp, tmp3, tmp2); + } goto end_op; case 0x0a: // XOR Rm,Rn 0010nnnnmmmm1010 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - emith_eor_r_r(tmp, tmp2); +#if PROPAGATE_CONSTANTS + if (GET_Rn() == GET_Rm()) { + gconst_new(GET_Rn(), 0); + goto end_op; + } +#endif + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); + emith_eor_r_r_r(tmp, tmp3, tmp2); goto end_op; case 0x0b: // OR Rm,Rn 0010nnnnmmmm1011 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - emith_or_r_r(tmp, tmp2); + if (GET_Rm() != GET_Rn()) { + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); + emith_or_r_r_r(tmp, tmp3, tmp2); + } goto end_op; case 0x0c: // CMP/STR Rm,Rn 0010nnnnmmmm1100 tmp = rcache_get_tmp(); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); emith_eor_r_r_r(tmp, tmp2, tmp3); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_bic_r_imm(sr, T); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_clr_t_cond(sr); emith_tst_r_imm(tmp, 0x000000ff); - emit_or_t_if_eq(sr); - emith_tst_r_imm(tmp, 0x0000ff00); - emit_or_t_if_eq(sr); - emith_tst_r_imm(tmp, 0x00ff0000); - emit_or_t_if_eq(sr); - emith_tst_r_imm(tmp, 0xff000000); - emit_or_t_if_eq(sr); + EMITH_SJMP_START(DCOND_EQ); + emith_tst_r_imm_c(DCOND_NE, tmp, 0x0000ff00); + EMITH_SJMP_START(DCOND_EQ); + emith_tst_r_imm_c(DCOND_NE, tmp, 0x00ff0000); + EMITH_SJMP_START(DCOND_EQ); + emith_tst_r_imm_c(DCOND_NE, tmp, 0xff000000); + EMITH_SJMP_END(DCOND_EQ); + EMITH_SJMP_END(DCOND_EQ); + EMITH_SJMP_END(DCOND_EQ); + emith_set_t_cond(sr, DCOND_EQ); rcache_free_tmp(tmp); goto end_op; case 0x0d: // XTRCT Rm,Rn 0010nnnnmmmm1101 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - emith_lsr(tmp, tmp, 16); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); + emith_lsr(tmp, tmp3, 16); emith_or_r_r_lsl(tmp, tmp2, 16); goto end_op; case 0x0e: // MULU.W Rm,Rn 0010nnnnmmmm1110 case 0x0f: // MULS.W Rm,Rn 0010nnnnmmmm1111 - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp = rcache_get_reg(SHR_MACL, RC_GR_WRITE); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL); + tmp4 = tmp3; if (op & 1) { - emith_sext(tmp, tmp2, 16); - } else - emith_clear_msb(tmp, tmp2, 16); - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); - tmp2 = rcache_get_tmp(); - if (op & 1) { - emith_sext(tmp2, tmp3, 16); - } else - emith_clear_msb(tmp2, tmp3, 16); - emith_mul(tmp, tmp, tmp2); - rcache_free_tmp(tmp2); + if (! rcache_is_s16(tmp2)) { + emith_sext(tmp, tmp2, 16); + tmp2 = tmp; + } + if (! rcache_is_s16(tmp3)) { + tmp4 = rcache_get_tmp(); + emith_sext(tmp4, tmp3, 16); + } + } else { + if (! rcache_is_u16(tmp2)) { + emith_clear_msb(tmp, tmp2, 16); + tmp2 = tmp; + } + if (! rcache_is_u16(tmp3)) { + tmp4 = rcache_get_tmp(); + emith_clear_msb(tmp4, tmp3, 16); + } + } + emith_mul(tmp, tmp2, tmp4); + if (tmp4 != tmp3) + rcache_free_tmp(tmp4); goto end_op; } goto default_; @@ -1957,37 +4260,30 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x03: // CMP/GE Rm,Rn 0011nnnnmmmm0011 case 0x06: // CMP/HI Rm,Rn 0011nnnnmmmm0110 case 0x07: // CMP/GT Rm,Rn 0011nnnnmmmm0111 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); - emith_bic_r_imm(sr, T); - emith_cmp_r_r(tmp2, tmp3); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); switch (op & 0x07) { case 0x00: // CMP/EQ - emit_or_t_if_eq(sr); + tmp = DCOND_EQ; break; case 0x02: // CMP/HS - EMITH_SJMP_START(DCOND_LO); - emith_or_r_imm_c(DCOND_HS, sr, T); - EMITH_SJMP_END(DCOND_LO); + tmp = DCOND_HS; break; case 0x03: // CMP/GE - EMITH_SJMP_START(DCOND_LT); - emith_or_r_imm_c(DCOND_GE, sr, T); - EMITH_SJMP_END(DCOND_LT); + tmp = DCOND_GE; break; case 0x06: // CMP/HI - EMITH_SJMP_START(DCOND_LS); - emith_or_r_imm_c(DCOND_HI, sr, T); - EMITH_SJMP_END(DCOND_LS); + tmp = DCOND_HI; break; case 0x07: // CMP/GT - EMITH_SJMP_START(DCOND_LE); - emith_or_r_imm_c(DCOND_GT, sr, T); - EMITH_SJMP_END(DCOND_LE); + tmp = DCOND_GT; break; } + emith_clr_t_cond(sr); + emith_cmp_r_r(tmp2, tmp3); + emith_set_t_cond(sr, tmp); goto end_op; case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100 // Q1 = carry(Rn = (Rn << 1) | T) @@ -1997,80 +4293,109 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // Q2 = carry(Rn -= Rm) // Q = M ^ Q1 ^ Q2 // T = (Q == M) = !(Q ^ M) = !(Q1 ^ Q2) - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_tpop_carry(sr, 0); - emith_adcf_r_r(tmp2, tmp2); - emith_tpush_carry(sr, 0); // keep Q1 in T for now - tmp4 = rcache_get_tmp(); - emith_and_r_r_imm(tmp4, sr, M); - emith_eor_r_r_lsr(sr, tmp4, M_SHIFT - Q_SHIFT); // Q ^= M - rcache_free_tmp(tmp4); - // add or sub, invert T if carry to get Q1 ^ Q2 - // in: (Q ^ M) passed in Q, Q1 in T + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); + tmp = rcache_get_tmp(); + if (drcf.Mflag != FLG_0) { + emith_and_r_r_imm(tmp, sr, M); + emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); // Q ^= M + } + rcache_free_tmp(tmp); + // shift Rn, add T, add or sub Rm, set T = !(Q1 ^ Q2) + // in: (Q ^ M) passed in Q emith_sh2_div1_step(tmp2, tmp3, sr); - emith_bic_r_imm(sr, Q); - emith_tst_r_imm(sr, M); - EMITH_SJMP_START(DCOND_EQ); - emith_or_r_imm_c(DCOND_NE, sr, Q); // Q = M - EMITH_SJMP_END(DCOND_EQ); - emith_tst_r_imm(sr, T); - EMITH_SJMP_START(DCOND_EQ); - emith_eor_r_imm_c(DCOND_NE, sr, Q); // Q = M ^ Q1 ^ Q2 - EMITH_SJMP_END(DCOND_EQ); - emith_eor_r_imm(sr, T); // T = !(Q1 ^ Q2) + tmp = rcache_get_tmp(); + emith_or_r_imm(sr, Q); // Q = !T + emith_and_r_r_imm(tmp, sr, T); + emith_eor_r_r_lsl(sr, tmp, Q_SHIFT); + if (drcf.Mflag != FLG_0) { // Q = M ^ !T = M ^ Q1 ^ Q2 + emith_and_r_r_imm(tmp, sr, M); + emith_eor_r_r_lsr(sr, tmp, M_SHIFT - Q_SHIFT); + } + rcache_free_tmp(tmp); goto end_op; case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101 - tmp = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE); - tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE); + tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL); + tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE, NULL); emith_mul_u64(tmp3, tmp4, tmp, tmp2); goto end_op; case 0x08: // SUB Rm,Rn 0011nnnnmmmm1000 +#if PROPAGATE_CONSTANTS + if (GET_Rn() == GET_Rm()) { + gconst_new(GET_Rn(), 0); + goto end_op; + } +#endif case 0x0c: // ADD Rm,Rn 0011nnnnmmmm1100 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); if (op & 4) { - emith_add_r_r(tmp, tmp2); + emith_add_r_r_r(tmp, tmp3, tmp2); } else - emith_sub_r_r(tmp, tmp2); + emith_sub_r_r_r(tmp, tmp3, tmp2); goto end_op; case 0x0a: // SUBC Rm,Rn 0011nnnnmmmm1010 case 0x0e: // ADDC Rm,Rn 0011nnnnmmmm1110 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - if (op & 4) { // adc - emith_tpop_carry(sr, 0); - emith_adcf_r_r(tmp, tmp2); - emith_tpush_carry(sr, 0); - } else { - emith_tpop_carry(sr, 1); - emith_sbcf_r_r(tmp, tmp2); - emith_tpush_carry(sr, 1); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) { + if (op & 4) { + emith_t_to_carry(sr, 0); + emith_adc_r_r_r(tmp, tmp3, tmp2); + } else { + emith_t_to_carry(sr, 1); + emith_sbc_r_r_r(tmp, tmp3, tmp2); + } + } else +#endif + { + EMITH_HINT_COND(DCOND_CS); + if (op & 4) { // adc + emith_tpop_carry(sr, 0); + emith_adcf_r_r_r(tmp, tmp3, tmp2); + emith_tpush_carry(sr, 0); + } else { + emith_tpop_carry(sr, 1); + emith_sbcf_r_r_r(tmp, tmp3, tmp2); + emith_tpush_carry(sr, 1); + } } goto end_op; case 0x0b: // SUBV Rm,Rn 0011nnnnmmmm1011 case 0x0f: // ADDV Rm,Rn 0011nnnnmmmm1111 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_bic_r_imm(sr, T); - if (op & 4) { - emith_addf_r_r(tmp, tmp2); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) { + if (op & 4) + emith_add_r_r_r(tmp,tmp3,tmp2); + else + emith_sub_r_r_r(tmp,tmp3,tmp2); } else - emith_subf_r_r(tmp, tmp2); - EMITH_SJMP_START(DCOND_VC); - emith_or_r_imm_c(DCOND_VS, sr, T); - EMITH_SJMP_END(DCOND_VC); +#endif + { + emith_clr_t_cond(sr); + EMITH_HINT_COND(DCOND_VS); + if (op & 4) + emith_addf_r_r_r(tmp, tmp3, tmp2); + else + emith_subf_r_r_r(tmp, tmp3, tmp2); + emith_set_t_cond(sr, DCOND_VS); + } goto end_op; case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101 - tmp = rcache_get_reg(GET_Rn(), RC_GR_READ); - tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); - tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE); - tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE); + tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE, NULL); + tmp4 = rcache_get_reg(SHR_MACH, RC_GR_WRITE, NULL); emith_mul_s64(tmp3, tmp4, tmp, tmp2); goto end_op; } @@ -2085,32 +4410,35 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { case 0: // SHLL Rn 0100nnnn00000000 case 2: // SHAL Rn 0100nnnn00100000 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_tpop_carry(sr, 0); // dummy - emith_lslf(tmp, tmp, 1); - emith_tpush_carry(sr, 0); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) + emith_lsl(tmp, tmp2, 1); + else +#endif + { + emith_invalidate_t(); + emith_lslf(tmp, tmp2, 1); + emith_carry_to_t(sr, 0); + } goto end_op; case 1: // DT Rn 0100nnnn00010000 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); -#if 0 // scheduling needs tuning - if (FETCH_OP(pc) == 0x8bfd) { // BF #-2 - if (gconst_get(GET_Rn(), &tmp)) { - // XXX: limit burned cycles - emit_move_r_imm32(GET_Rn(), 0); - emith_or_r_imm(sr, T); - cycles += tmp * 4 + 1; // +1 syncs with noconst version, not sure why - skip_op = 1; - } + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); +#if LOOP_DETECTION + if (drcf.loop_type == OF_DELAY_LOOP) { + if (drcf.delay_reg == -1) + drcf.delay_reg = GET_Rn(); else - emith_sh2_dtbf_loop(); - goto end_op; + drcf.polling = drcf.loop_type = 0; } #endif - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - emith_bic_r_imm(sr, T); - emith_subf_r_imm(tmp, 1); - emit_or_t_if_eq(sr); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); + emith_clr_t_cond(sr); + EMITH_HINT_COND(DCOND_EQ); + emith_subf_r_r_imm(tmp, tmp2, 1); + emith_set_t_cond(sr, DCOND_EQ); + emith_or_r_imm(sr, SH2_NO_POLLING); goto end_op; } goto default_; @@ -2119,23 +4447,31 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { case 0: // SHLR Rn 0100nnnn00000001 case 2: // SHAR Rn 0100nnnn00100001 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_tpop_carry(sr, 0); // dummy - if (op & 0x20) { - emith_asrf(tmp, tmp, 1); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) { + if (op & 0x20) + emith_asr(tmp,tmp2,1); + else + emith_lsr(tmp,tmp2,1); } else - emith_lsrf(tmp, tmp, 1); - emith_tpush_carry(sr, 0); +#endif + { + emith_invalidate_t(); + if (op & 0x20) { + emith_asrf(tmp, tmp2, 1); + } else + emith_lsrf(tmp, tmp2, 1); + emith_carry_to_t(sr, 0); + } goto end_op; case 1: // CMP/PZ Rn 0100nnnn00010001 - tmp = rcache_get_reg(GET_Rn(), RC_GR_READ); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_bic_r_imm(sr, T); + tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_clr_t_cond(sr); emith_cmp_r_imm(tmp, 0); - EMITH_SJMP_START(DCOND_LT); - emith_or_r_imm_c(DCOND_GE, sr, T); - EMITH_SJMP_END(DCOND_LT); + emith_set_t_cond(sr, DCOND_GE); goto end_op; } goto default_; @@ -2164,14 +4500,13 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) default: goto default_; } - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW); - emith_sub_r_imm(tmp2, 4); - rcache_clean(); - rcache_get_reg_arg(0, GET_Rn()); - tmp3 = rcache_get_reg_arg(1, tmp); - if (tmp == SHR_SR) - emith_clear_msb(tmp3, tmp3, 22); // reserved bits defined by ISA as 0 - emit_memhandler_write(2); + if (tmp == SHR_SR) { + tmp3 = rcache_get_reg_arg(1, tmp, &tmp4); + emith_sync_t(tmp4); + emith_clear_msb(tmp3, tmp4, 22); // reserved bits defined by ISA as 0 + } else + tmp3 = rcache_get_reg_arg(1, tmp, NULL); + emit_memhandler_write_rr(sh2, SHR_TMP, GET_Rn(), 0, 2 | MF_PREDECR); goto end_op; case 0x04: case 0x05: @@ -2179,34 +4514,54 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { case 0x04: // ROTL Rn 0100nnnn00000100 case 0x05: // ROTR Rn 0100nnnn00000101 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_tpop_carry(sr, 0); // dummy - if (op & 1) { - emith_rorf(tmp, tmp, 1); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp2); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) { + if (op & 1) + emith_ror(tmp, tmp2, 1); + else + emith_rol(tmp, tmp2, 1); } else - emith_rolf(tmp, tmp, 1); - emith_tpush_carry(sr, 0); +#endif + { + emith_invalidate_t(); + if (op & 1) + emith_rorf(tmp, tmp2, 1); + else + emith_rolf(tmp, tmp2, 1); + emith_carry_to_t(sr, 0); + } goto end_op; case 0x24: // ROTCL Rn 0100nnnn00100100 case 0x25: // ROTCR Rn 0100nnnn00100101 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_tpop_carry(sr, 0); - if (op & 1) { - emith_rorcf(tmp); + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW, NULL); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) { + emith_t_to_carry(sr, 0); + if (op & 1) + emith_rorc(tmp); + else + emith_rolc(tmp); } else - emith_rolcf(tmp); - emith_tpush_carry(sr, 0); +#endif + { + emith_tpop_carry(sr, 0); + if (op & 1) + emith_rorcf(tmp); + else + emith_rolcf(tmp); + emith_tpush_carry(sr, 0); + } goto end_op; case 0x15: // CMP/PL Rn 0100nnnn00010101 - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_bic_r_imm(sr, T); + tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_clr_t_cond(sr); emith_cmp_r_imm(tmp, 0); - EMITH_SJMP_START(DCOND_LE); - emith_or_r_imm_c(DCOND_GT, sr, T); - EMITH_SJMP_END(DCOND_LE); + emith_set_t_cond(sr, DCOND_GT); goto end_op; } goto default_; @@ -2235,47 +4590,40 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) default: goto default_; } - rcache_get_reg_arg(0, GET_Rn()); - tmp2 = emit_memhandler_read(2); if (tmp == SHR_SR) { - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + emith_invalidate_t(); + tmp2 = emit_memhandler_read_rr(sh2, SHR_TMP, GET_Rn(), 0, 2 | MF_POSTINCR); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_write_sr(sr, tmp2); + rcache_free_tmp(tmp2); drcf.test_irq = 1; - } else { - tmp = rcache_get_reg(tmp, RC_GR_WRITE); - emith_move_r_r(tmp, tmp2); - } - rcache_free_tmp(tmp2); - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - emith_add_r_imm(tmp, 4); + } else + emit_memhandler_read_rr(sh2, tmp, GET_Rn(), 0, 2 | MF_POSTINCR); goto end_op; case 0x08: case 0x09: switch (GET_Fx()) { - case 0: - // SHLL2 Rn 0100nnnn00001000 - // SHLR2 Rn 0100nnnn00001001 + case 0: // SHLL2 Rn 0100nnnn00001000 + // SHLR2 Rn 0100nnnn00001001 tmp = 2; break; - case 1: - // SHLL8 Rn 0100nnnn00011000 - // SHLR8 Rn 0100nnnn00011001 + case 1: // SHLL8 Rn 0100nnnn00011000 + // SHLR8 Rn 0100nnnn00011001 tmp = 8; break; - case 2: - // SHLL16 Rn 0100nnnn00101000 - // SHLR16 Rn 0100nnnn00101001 + case 2: // SHLL16 Rn 0100nnnn00101000 + // SHLR16 Rn 0100nnnn00101001 tmp = 16; break; default: goto default_; } - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_RMW, &tmp3); if (op & 1) { - emith_lsr(tmp2, tmp2, tmp); + emith_lsr(tmp2, tmp3, tmp); } else - emith_lsl(tmp2, tmp2, tmp); + emith_lsl(tmp2, tmp3, tmp); goto end_op; case 0x0a: switch (GET_Fx()) @@ -2299,18 +4647,19 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { case 1: // TAS.B @Rn 0100nnnn00011011 // XXX: is TAS working on 32X? - rcache_get_reg_arg(0, GET_Rn()); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + rcache_get_reg_arg(0, GET_Rn(), NULL); tmp = emit_memhandler_read(0); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_bic_r_imm(sr, T); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_clr_t_cond(sr); emith_cmp_r_imm(tmp, 0); - emit_or_t_if_eq(sr); - rcache_clean(); + emith_set_t_cond(sr, DCOND_EQ); emith_or_r_imm(tmp, 0x80); tmp2 = rcache_get_tmp_arg(1); // assuming it differs to tmp emith_move_r_r(tmp2, tmp); rcache_free_tmp(tmp); - rcache_get_reg_arg(0, GET_Rn()); + rcache_get_reg_arg(0, GET_Rn(), NULL); emit_memhandler_write(0); break; default: @@ -2318,7 +4667,6 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } goto end_op; case 0x0e: - tmp = rcache_get_reg(GET_Rn(), RC_GR_READ); switch (GET_Fx()) { case 0: // LDC Rm,SR 0100mmmm00001110 @@ -2334,48 +4682,31 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto default_; } if (tmp2 == SHR_SR) { - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + emith_invalidate_t(); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + tmp = rcache_get_reg(GET_Rn(), RC_GR_READ, NULL); emith_write_sr(sr, tmp); drcf.test_irq = 1; - } else { - tmp2 = rcache_get_reg(tmp2, RC_GR_WRITE); - emith_move_r_r(tmp2, tmp); - } + } else + emit_move_r_r(tmp2, GET_Rn()); goto end_op; - case 0x0f: - // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111 - emit_indirect_read_double(&tmp, &tmp2, GET_Rn(), GET_Rm(), 1); - emith_sext(tmp, tmp, 16); - emith_sext(tmp2, tmp2, 16); - tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW); - tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW); - emith_mula_s64(tmp3, tmp4, tmp, tmp2); + case 0x0f: // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111 + emit_indirect_read_double(sh2, &tmp, &tmp2, GET_Rn(), GET_Rm(), 1); + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + tmp3 = rcache_get_reg(SHR_MACL, RC_GR_RMW, NULL); + tmp4 = rcache_get_reg(SHR_MACH, RC_GR_RMW, NULL); + emith_sh2_macw(tmp3, tmp4, tmp, tmp2, sr); rcache_free_tmp(tmp2); - // XXX: MACH should be untouched when S is set? - sr = rcache_get_reg(SHR_SR, RC_GR_READ); - emith_tst_r_imm(sr, S); - EMITH_JMP_START(DCOND_EQ); - - emith_asr(tmp, tmp3, 31); - emith_eorf_r_r(tmp, tmp4); // tmp = ((signed)macl >> 31) ^ mach - EMITH_JMP_START(DCOND_EQ); - emith_move_r_imm(tmp3, 0x80000000); - emith_tst_r_r(tmp4, tmp4); - EMITH_SJMP_START(DCOND_MI); - emith_sub_r_imm_c(DCOND_PL, tmp3, 1); // positive - EMITH_SJMP_END(DCOND_MI); - EMITH_JMP_END(DCOND_EQ); - - EMITH_JMP_END(DCOND_EQ); rcache_free_tmp(tmp); goto end_op; } goto default_; ///////////////////////////////////////////// - case 0x05: - // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd - emit_memhandler_read_rr(GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2); + case 0x05: // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), (op & 0x0f) * 4, 2 | drcf.polling); goto end_op; ///////////////////////////////////////////// @@ -2388,21 +4719,19 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x04: // MOV.B @Rm+,Rn 0110nnnnmmmm0100 case 0x05: // MOV.W @Rm+,Rn 0110nnnnmmmm0101 case 0x06: // MOV.L @Rm+,Rn 0110nnnnmmmm0110 - emit_memhandler_read_rr(GET_Rn(), GET_Rm(), 0, op & 3); - if ((op & 7) >= 4 && GET_Rn() != GET_Rm()) { - tmp = rcache_get_reg(GET_Rm(), RC_GR_RMW); - emith_add_r_imm(tmp, (1 << (op & 3))); - } + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + tmp = ((op & 7) >= 4 && GET_Rn() != GET_Rm()) ? MF_POSTINCR : drcf.polling; + emit_memhandler_read_rr(sh2, GET_Rn(), GET_Rm(), 0, (op & 3) | tmp); goto end_op; - case 0x03: - case 0x07 ... 0x0f: - tmp = rcache_get_reg(GET_Rm(), RC_GR_READ); - tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE); + case 0x03: // MOV Rm,Rn 0110nnnnmmmm0011 + emit_move_r_r(GET_Rn(), GET_Rm()); + goto end_op; + default: // 0x07 ... 0x0f + tmp = rcache_get_reg(GET_Rm(), RC_GR_READ, NULL); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL); switch (op & 0x0f) { - case 0x03: // MOV Rm,Rn 0110nnnnmmmm0011 - emith_move_r_r(tmp2, tmp); - break; case 0x07: // NOT Rm,Rn 0110nnnnmmmm0111 emith_mvn_r_r(tmp2, tmp); break; @@ -2424,25 +4753,39 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_rol(tmp2, tmp, 16); break; case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010 - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_tpop_carry(sr, 1); - emith_negcf_r_r(tmp2, tmp); - emith_tpush_carry(sr, 1); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_sync_t(sr); +#if T_OPTIMIZER + if (rcache_regs_discard & BITMASK1(SHR_T)) { + emith_t_to_carry(sr, 1); + emith_negc_r_r(tmp2, tmp); + } else +#endif + { + EMITH_HINT_COND(DCOND_CS); + emith_tpop_carry(sr, 1); + emith_negcf_r_r(tmp2, tmp); + emith_tpush_carry(sr, 1); + } break; case 0x0b: // NEG Rm,Rn 0110nnnnmmmm1011 emith_neg_r_r(tmp2, tmp); break; case 0x0c: // EXTU.B Rm,Rn 0110nnnnmmmm1100 emith_clear_msb(tmp2, tmp, 24); + rcache_set_x16(tmp2, 1, 1); break; case 0x0d: // EXTU.W Rm,Rn 0110nnnnmmmm1101 emith_clear_msb(tmp2, tmp, 16); + rcache_set_x16(tmp2, 0, 1); break; case 0x0e: // EXTS.B Rm,Rn 0110nnnnmmmm1110 emith_sext(tmp2, tmp, 8); + rcache_set_x16(tmp2, 1, 0); break; case 0x0f: // EXTS.W Rm,Rn 0110nnnnmmmm1111 emith_sext(tmp2, tmp, 16); + rcache_set_x16(tmp2, 1, 0); break; } goto end_op; @@ -2450,13 +4793,11 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) goto default_; ///////////////////////////////////////////// - case 0x07: - // ADD #imm,Rn 0111nnnniiiiiiii - tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); - if (op & 0x80) { // adding negative - emith_sub_r_imm(tmp, -op & 0xff); - } else - emith_add_r_imm(tmp, op & 0xff); + case 0x07: // ADD #imm,Rn 0111nnnniiiiiiii + if (op & 0x80) // adding negative + emit_sub_r_imm(GET_Rn(), (u8)-op); + else + emit_add_r_imm(GET_Rn(), (u8)op); goto end_op; ///////////////////////////////////////////// @@ -2465,29 +4806,24 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { case 0x0000: // MOV.B R0,@(disp,Rn) 10000000nnnndddd case 0x0100: // MOV.W R0,@(disp,Rn) 10000001nnnndddd - rcache_clean(); - tmp = rcache_get_reg_arg(0, GET_Rm()); - tmp2 = rcache_get_reg_arg(1, SHR_R0); - tmp3 = (op & 0x100) >> 8; - if (op & 0x0f) - emith_add_r_imm(tmp, (op & 0x0f) << tmp3); - emit_memhandler_write(tmp3); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + tmp = (op & 0x100) >> 8; + emit_memhandler_write_rr(sh2, SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp); goto end_op; case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); tmp = (op & 0x100) >> 8; - emit_memhandler_read_rr(SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp); + emit_memhandler_read_rr(sh2, SHR_R0, GET_Rm(), (op & 0x0f) << tmp, tmp | drcf.polling); goto end_op; case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii - // XXX: could use cmn - tmp = rcache_get_tmp(); - tmp2 = rcache_get_reg(0, RC_GR_READ); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_move_r_imm_s8(tmp, op & 0xff); - emith_bic_r_imm(sr, T); - emith_cmp_r_r(tmp2, tmp); - emit_or_t_if_eq(sr); - rcache_free_tmp(tmp); + tmp2 = rcache_get_reg(SHR_R0, RC_GR_READ, NULL); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_clr_t_cond(sr); + emith_cmp_r_imm(tmp2, (s8)(op & 0xff)); + emith_set_t_cond(sr, DCOND_EQ); goto end_op; } goto default_; @@ -2499,123 +4835,99 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) case 0x0000: // MOV.B R0,@(disp,GBR) 11000000dddddddd case 0x0100: // MOV.W R0,@(disp,GBR) 11000001dddddddd case 0x0200: // MOV.L R0,@(disp,GBR) 11000010dddddddd - rcache_clean(); - tmp = rcache_get_reg_arg(0, SHR_GBR); - tmp2 = rcache_get_reg_arg(1, SHR_R0); - tmp3 = (op & 0x300) >> 8; - emith_add_r_imm(tmp, (op & 0xff) << tmp3); - emit_memhandler_write(tmp3); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + tmp = (op & 0x300) >> 8; + emit_memhandler_write_rr(sh2, SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp); goto end_op; case 0x0400: // MOV.B @(disp,GBR),R0 11000100dddddddd case 0x0500: // MOV.W @(disp,GBR),R0 11000101dddddddd case 0x0600: // MOV.L @(disp,GBR),R0 11000110dddddddd + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); tmp = (op & 0x300) >> 8; - emit_memhandler_read_rr(SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp); - goto end_op; - case 0x0300: // TRAPA #imm 11000011iiiiiiii - tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); - emith_sub_r_imm(tmp, 4*2); - // push SR - tmp = rcache_get_reg_arg(0, SHR_SP); - emith_add_r_imm(tmp, 4); - tmp = rcache_get_reg_arg(1, SHR_SR); - emith_clear_msb(tmp, tmp, 22); - emit_memhandler_write(2); - // push PC - rcache_get_reg_arg(0, SHR_SP); - tmp = rcache_get_tmp_arg(1); - emith_move_r_imm(tmp, pc); - emit_memhandler_write(2); - // obtain new PC - emit_memhandler_read_rr(SHR_PC, SHR_VBR, (op & 0xff) * 4, 2); - // indirect jump -> back to dispatcher - rcache_flush(); - emith_jump(sh2_drc_dispatcher); + emit_memhandler_read_rr(sh2, SHR_R0, SHR_GBR, (op & 0xff) << tmp, tmp | drcf.polling); goto end_op; case 0x0800: // TST #imm,R0 11001000iiiiiiii - tmp = rcache_get_reg(SHR_R0, RC_GR_READ); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_bic_r_imm(sr, T); + tmp = rcache_get_reg(SHR_R0, RC_GR_READ, NULL); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_clr_t_cond(sr); emith_tst_r_imm(tmp, op & 0xff); - emit_or_t_if_eq(sr); + emith_set_t_cond(sr, DCOND_EQ); goto end_op; case 0x0900: // AND #imm,R0 11001001iiiiiiii - tmp = rcache_get_reg(SHR_R0, RC_GR_RMW); - emith_and_r_imm(tmp, op & 0xff); + tmp = rcache_get_reg(SHR_R0, RC_GR_RMW, &tmp2); + emith_and_r_r_imm(tmp, tmp2, (op & 0xff)); goto end_op; case 0x0a00: // XOR #imm,R0 11001010iiiiiiii - tmp = rcache_get_reg(SHR_R0, RC_GR_RMW); - emith_eor_r_imm(tmp, op & 0xff); + if (op & 0xff) { + tmp = rcache_get_reg(SHR_R0, RC_GR_RMW, &tmp2); + emith_eor_r_r_imm(tmp, tmp2, (op & 0xff)); + } goto end_op; case 0x0b00: // OR #imm,R0 11001011iiiiiiii - tmp = rcache_get_reg(SHR_R0, RC_GR_RMW); - emith_or_r_imm(tmp, op & 0xff); + if (op & 0xff) { + tmp = rcache_get_reg(SHR_R0, RC_GR_RMW, &tmp2); + emith_or_r_r_imm(tmp, tmp2, (op & 0xff)); + } goto end_op; case 0x0c00: // TST.B #imm,@(R0,GBR) 11001100iiiiiiii - tmp = emit_indirect_indexed_read(SHR_R0, SHR_GBR, 0); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_bic_r_imm(sr, T); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0 | drcf.polling); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + emith_clr_t_cond(sr); emith_tst_r_imm(tmp, op & 0xff); - emit_or_t_if_eq(sr); + emith_set_t_cond(sr, DCOND_EQ); rcache_free_tmp(tmp); goto end_op; case 0x0d00: // AND.B #imm,@(R0,GBR) 11001101iiiiiiii - tmp = emit_indirect_indexed_read(SHR_R0, SHR_GBR, 0); - emith_and_r_imm(tmp, op & 0xff); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0); + tmp2 = rcache_get_tmp_arg(1); + emith_and_r_r_imm(tmp2, tmp, (op & 0xff)); goto end_rmw_op; case 0x0e00: // XOR.B #imm,@(R0,GBR) 11001110iiiiiiii - tmp = emit_indirect_indexed_read(SHR_R0, SHR_GBR, 0); - emith_eor_r_imm(tmp, op & 0xff); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0); + tmp2 = rcache_get_tmp_arg(1); + emith_eor_r_r_imm(tmp2, tmp, (op & 0xff)); goto end_rmw_op; case 0x0f00: // OR.B #imm,@(R0,GBR) 11001111iiiiiiii - tmp = emit_indirect_indexed_read(SHR_R0, SHR_GBR, 0); - emith_or_r_imm(tmp, op & 0xff); - end_rmw_op: + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); + tmp = emit_indirect_indexed_read(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0); tmp2 = rcache_get_tmp_arg(1); - emith_move_r_r(tmp2, tmp); + emith_or_r_r_imm(tmp2, tmp, (op & 0xff)); + end_rmw_op: rcache_free_tmp(tmp); - tmp3 = rcache_get_reg_arg(0, SHR_GBR); - tmp4 = rcache_get_reg(SHR_R0, RC_GR_READ); - emith_add_r_r(tmp3, tmp4); - emit_memhandler_write(0); + emit_indirect_indexed_write(sh2, SHR_TMP, SHR_R0, SHR_GBR, 0); goto end_op; } goto default_; ///////////////////////////////////////////// - case 0x0e: - // MOV #imm,Rn 1110nnnniiiiiiii - emit_move_r_imm32(GET_Rn(), (u32)(signed int)(signed char)op); + case 0x0e: // MOV #imm,Rn 1110nnnniiiiiiii + emit_move_r_imm32(GET_Rn(), (s8)op); goto end_op; default: default_: - elprintf_sh2(sh2, EL_ANOMALY, - "drc: illegal op %04x @ %08x", op, pc - 2); - - tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); - emith_sub_r_imm(tmp, 4*2); - // push SR - tmp = rcache_get_reg_arg(0, SHR_SP); - emith_add_r_imm(tmp, 4); - tmp = rcache_get_reg_arg(1, SHR_SR); - emith_clear_msb(tmp, tmp, 22); - emit_memhandler_write(2); - // push PC - rcache_get_reg_arg(0, SHR_SP); - tmp = rcache_get_tmp_arg(1); - emith_move_r_imm(tmp, pc - 2); - emit_memhandler_write(2); - // obtain new PC - emit_memhandler_read_rr(SHR_PC, SHR_VBR, 4 * 4, 2); - // indirect jump -> back to dispatcher - rcache_flush(); - emith_jump(sh2_drc_dispatcher); - break; + if (!(op_flags[i] & OF_B_IN_DS)) { + elprintf_sh2(sh2, EL_ANOMALY, + "drc: illegal op %04x @ %08x", op, pc - 2); + exit(1); + } } end_op: rcache_unlock_all(); + rcache_set_usage_now(0); +#if DRC_DEBUG & 64 + RCACHE_CHECK("after insn"); +#endif cycles += opd->cycles; @@ -2626,8 +4938,9 @@ end_op: // test irq? if (drcf.test_irq && !drcf.pending_branch_direct) { - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); + emith_sync_t(sr); if (!drcf.pending_branch_indirect) emit_move_r_imm32(SHR_PC, pc); rcache_flush(); @@ -2635,174 +4948,302 @@ end_op: drcf.test_irq = 0; } - // branch handling (with/without delay) + // branch handling if (drcf.pending_branch_direct) { - struct op_data *opd_b = - (op_flags[i] & OF_DELAY_OP) ? &ops[i-1] : opd; + struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; u32 target_pc = opd_b->imm; int cond = -1; + int ctaken = 0; void *target = NULL; - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - FLUSH_CYCLES(sr); + if (OP_ISBRACND(opd_b->op)) + ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; + cycles += ctaken; // assume branch taken - if (opd_b->op != OP_BRANCH) - cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE; - if (cond != -1) { - int ctaken = (op_flags[i] & OF_DELAY_OP) ? 1 : 2; - - if (delay_dep_fw & BITMASK1(SHR_T)) - emith_tst_r_imm(sr, T_save); - else - emith_tst_r_imm(sr, T); - - emith_sub_r_imm_c(cond, sr, ctaken<<12); +#if LOOP_OPTIMIZER + if ((drcf.loop_type == OF_IDLE_LOOP || + (drcf.loop_type == OF_DELAY_LOOP && drcf.delay_reg >= 0))) + { + // idle or delay loop + emit_sync_t_to_sr(); + emith_sh2_delay_loop(cycles, drcf.delay_reg); + rcache_unlock_all(); // may lock delay_reg + drcf.polling = drcf.loop_type = drcf.pinning = 0; } +#endif + +#if CALL_STACK + void *rtsadd = NULL, *rtsret = NULL; + if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { + // BSR - save rts data + tmp = rcache_get_tmp_arg(1); + rtsadd = tcache_ptr; + emith_move_r_imm_s8_patchable(tmp, 0); + rcache_clean_tmp(); + rcache_invalidate_tmp(); + emith_call(sh2_drc_dispatcher_call); + rtsret = tcache_ptr; + } +#endif + + // XXX move below cond test if not changing host cond (MIPS delay slot)? + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(sr); rcache_clean(); -#if LINK_BRANCHES - if (find_in_array(branch_target_pc, branch_target_count, target_pc) >= 0) + if (OP_ISBRACND(opd_b->op)) { + // BT[S], BF[S] - emit condition test + cond = (opd_b->op == OP_BRANCH_CF) ? DCOND_EQ : DCOND_NE; + if (delay_dep_fw & BITMASK1(SHR_T)) { + emith_sync_t(sr); + emith_tst_r_imm(sr, T_save); + } else { + cond = emith_tst_t(sr, (opd_b->op == OP_BRANCH_CT)); + if (emith_get_t_cond() >= 0) { + if (opd_b->op == OP_BRANCH_CT) + emith_or_r_imm_c(cond, sr, T); + else + emith_bic_r_imm_c(cond, sr, T); + } + } + } else + emith_sync_t(sr); + // no modification of host status/flags between here and branching! + + v = find_in_sorted_linkage(branch_targets, branch_target_count, target_pc); + if (v >= 0) { // local branch - // XXX: jumps back can be linked already - if (branch_patch_count < MAX_LOCAL_BRANCHES) { + if (branch_targets[v].ptr) { + // local backward jump, link here now since host PC is already known + target = branch_targets[v].ptr; +#if LOOP_OPTIMIZER + if (pinned_loops[pinned_loop_count].pc == target_pc) { + // backward jump at end of optimized loop + rcache_unpin_all(); + target = pinned_loops[pinned_loop_count].ptr; + pinned_loop_count ++; + } +#endif + if (cond != -1) { + if (emith_jump_patch_inrange(tcache_ptr, target)) { + emith_jump_cond(cond, target); + } else { + // not reachable directly, must use far branch + EMITH_JMP_START(emith_invert_cond(cond)); + emith_jump(target); + EMITH_JMP_END(emith_invert_cond(cond)); + } + } else { + emith_jump(target); + rcache_invalidate(); + } + } else if (blx_target_count < MAX_LOCAL_BRANCHES) { + // local forward jump target = tcache_ptr; - branch_patch_pc[branch_patch_count] = target_pc; - branch_patch_ptr[branch_patch_count] = target; - branch_patch_count++; - } - else - dbg(1, "warning: too many local branches"); + blx_targets[blx_target_count++] = + (struct linkage) { .pc = target_pc, .ptr = target, .mask = 0x2 }; + if (cond != -1) + emith_jump_cond_patchable(cond, target); + else { + emith_jump_patchable(target); + rcache_invalidate(); + } + } else + // no space for resolving forward branch, handle it as external + dbg(1, "warning: too many unresolved branches"); } if (target == NULL) -#endif { // can't resolve branch locally, make a block exit - emit_move_r_imm32(SHR_PC, target_pc); - rcache_clean(); + bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); + if (cond != -1) { +#ifndef __arm__ + if (bl && blx_target_count < ARRAY_SIZE(blx_targets)) { + // conditional jumps get a blx stub for the far jump + bl->type = BL_JCCBLX; + target = tcache_ptr; + blx_targets[blx_target_count++] = + (struct linkage) { .pc = target_pc, .ptr = target, .bl = bl }; + emith_jump_cond_patchable(cond, target); + } else { + // not linkable, or blx table full; inline jump @dispatcher + EMITH_JMP_START(emith_invert_cond(cond)); + if (bl) { + bl->jump = tcache_ptr; + emith_flush(); // flush to inhibit insn swapping + bl->type = BL_LDJMP; + } + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, target_pc); + rcache_free_tmp(tmp); + target = sh2_drc_dispatcher; - target = dr_prepare_ext_branch(target_pc, sh2->is_slave, tcache_id); - if (target == NULL) - return NULL; + emith_jump_patchable(target); + EMITH_JMP_END(emith_invert_cond(cond)); + } +#else + // jump @dispatcher - ARM 32bit version with conditional execution + EMITH_SJMP_START(emith_invert_cond(cond)); + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm_c(cond, tmp, target_pc); + rcache_free_tmp(tmp); + target = sh2_drc_dispatcher; + + if (bl) { + bl->jump = tcache_ptr; + bl->type = BL_JMP; + } + emith_jump_cond_patchable(cond, target); + EMITH_SJMP_END(emith_invert_cond(cond)); +#endif + } else { + // unconditional, has the far jump inlined + if (bl) { + emith_flush(); // flush to inhibit insn swapping + bl->type = BL_LDJMP; + } + + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, target_pc); + rcache_free_tmp(tmp); + target = sh2_drc_dispatcher; + + emith_jump_patchable(target); + rcache_invalidate(); + } } - if (cond != -1) - emith_jump_cond_patchable(cond, target); - else { - emith_jump_patchable(target); - rcache_invalidate(); - } +#if CALL_STACK + if (rtsadd) + emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret); +#endif + + // branch not taken, correct cycle count (now, cycles < 0) + if (ctaken) + cycles -= ctaken; + // set T bit to reflect branch not taken for OP_BRANCH_CT/CF + if (emith_get_t_cond() >= 0) // T is synced for all other cases + emith_set_t(sr, opd_b->op == OP_BRANCH_CF); drcf.pending_branch_direct = 0; + if (target_pc >= base_pc && target_pc < pc) + drcf.polling = drcf.loop_type = 0; } else if (drcf.pending_branch_indirect) { - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + u32 target_pc; + + tmp = rcache_get_reg_arg(0, SHR_PC, NULL); + +#if CALL_STACK + struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; + void *rtsadd = NULL, *rtsret = NULL; + + if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { + // JSR, BSRF - save rts data + tmp = rcache_get_tmp_arg(1); + rtsadd = tcache_ptr; + emith_move_r_imm_s8_patchable(tmp, 0); + rcache_clean_tmp(); + rcache_invalidate_tmp(); + emith_call(sh2_drc_dispatcher_call); + rtsret = tcache_ptr; + } +#endif + + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); FLUSH_CYCLES(sr); - rcache_flush(); - emith_jump(sh2_drc_dispatcher); + emith_sync_t(sr); + rcache_clean(); + +#if CALL_STACK + if (opd_b->rm == SHR_PR) { + // RTS - restore rts data, else jump to dispatcher + emith_jump(sh2_drc_dispatcher_return); + } else +#endif + if (gconst_get(SHR_PC, &target_pc)) { + // JMP, JSR, BRAF, BSRF const - treat like unconditional direct branch + bl = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); + if (bl) // pc already loaded somewhere else, can patch jump only + bl->type = BL_JMP; + emith_jump_patchable(sh2_drc_dispatcher); + } else { + // JMP, JSR, BRAF, BSRF not const + emith_jump(sh2_drc_dispatcher); + } + rcache_invalidate(); + +#if CALL_STACK + if (rtsadd) + emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret); +#endif + drcf.pending_branch_indirect = 0; + drcf.polling = drcf.loop_type = 0; } + rcache_unlock_all(); do_host_disasm(tcache_id); } - tmp = rcache_get_reg(SHR_SR, RC_GR_RMW); - FLUSH_CYCLES(tmp); - rcache_flush(); - // check the last op if (op_flags[i-1] & OF_DELAY_OP) opd = &ops[i-2]; else opd = &ops[i-1]; - if (opd->op != OP_BRANCH && opd->op != OP_BRANCH_R - && opd->op != OP_BRANCH_RF && opd->op != OP_RTE) + if (! OP_ISBRAUC(opd->op) || (opd->dest & BITMASK1(SHR_PR))) { - void *target; + tmp = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); + FLUSH_CYCLES(tmp); + emith_sync_t(tmp); - emit_move_r_imm32(SHR_PC, pc); + rcache_clean(); + bl = dr_prepare_ext_branch(block->entryp, pc, sh2->is_slave, tcache_id); + if (bl) { + emith_flush(); // flush to inhibit insn swapping + bl->type = BL_LDJMP; + } + tmp = rcache_get_tmp_arg(0); + emith_move_r_imm(tmp, pc); + emith_jump_patchable(sh2_drc_dispatcher); + rcache_invalidate(); + } else rcache_flush(); - target = dr_prepare_ext_branch(pc, sh2->is_slave, tcache_id); - if (target == NULL) - return NULL; - emith_jump_patchable(target); - } + // link unresolved branches, emitting blx area entries as needed + emit_branch_linkage_code(sh2, block, tcache_id, branch_targets, + branch_target_count, blx_targets, blx_target_count); - // link local branches - for (i = 0; i < branch_patch_count; i++) { - void *target; - int t; - t = find_in_array(branch_target_pc, branch_target_count, branch_patch_pc[i]); - target = branch_target_ptr[t]; - if (target == NULL) { - // flush pc and go back to dispatcher (this should no longer happen) - dbg(1, "stray branch to %08x %p", branch_patch_pc[i], tcache_ptr); - target = tcache_ptr; - emit_move_r_imm32(SHR_PC, branch_patch_pc[i]); - rcache_flush(); - emith_jump(sh2_drc_dispatcher); - } - emith_jump_patch(branch_patch_ptr[i], target); - } + emith_flush(); + do_host_disasm(tcache_id); - // mark memory blocks as containing compiled code - // override any overlay blocks as they become unreachable anyway - if ((block->addr & 0xc7fc0000) == 0x06000000 - || (block->addr & 0xfffff000) == 0xc0000000) - { - u16 *drc_ram_blk = NULL; - u32 addr, mask = 0, shift = 0; + emith_pool_commit(0); - if (tcache_id != 0) { - // data array, BIOS - drc_ram_blk = Pico32xMem->drcblk_da[sh2->is_slave]; - shift = SH2_DRCBLK_DA_SHIFT; - mask = 0xfff; - } - else { - // SDRAM - drc_ram_blk = Pico32xMem->drcblk_ram; - shift = SH2_DRCBLK_RAM_SHIFT; - mask = 0x3ffff; - } + // fill blx backup; do this last to backup final patched code + for (i = 0; i < block->entry_count; i++) + for (bl = block->entryp[i].o_links; bl; bl = bl->o_next) + memcpy(bl->jdisp, bl->blx ? bl->blx : bl->jump, emith_jump_at_size()); - // mark recompiled insns - drc_ram_blk[(base_pc & mask) >> shift] = 1; - for (pc = base_pc; pc < end_pc; pc += 2) - drc_ram_blk[(pc & mask) >> shift] = 1; + ring_alloc(&tcache_ring[tcache_id], tcache_ptr - block_entry_ptr); + host_instructions_updated(block_entry_ptr, tcache_ptr, 1); - // mark literals - for (i = 0; i < literal_addr_count; i++) { - tmp = literal_addr[i]; - drc_ram_blk[(tmp & mask) >> shift] = 1; - } - - // add to invalidation lookup lists - addr = base_pc & ~(INVAL_PAGE_SIZE - 1); - for (; addr < end_literals; addr += INVAL_PAGE_SIZE) { - i = (addr & mask) / INVAL_PAGE_SIZE; - add_to_block_list(&inval_lookup[tcache_id][i], block); - } - } - - tcache_ptrs[tcache_id] = tcache_ptr; - - host_instructions_updated(block_entry_ptr, tcache_ptr); + dr_activate_block(block, tcache_id, sh2->is_slave); + emith_update_cache(); do_host_disasm(tcache_id); - if (drcf.literals_disabled && literal_addr_count) - dbg(1, "literals_disabled && literal_addr_count?"); - dbg(2, " block #%d,%d tcache %d/%d, insns %d -> %d %.3f", - tcache_id, blkid_main, - tcache_ptr - tcache_bases[tcache_id], tcache_sizes[tcache_id], + dbg(2, " block #%d,%d -> %p tcache %d/%d, insns %d -> %d %.3f", + tcache_id, blkid_main, tcache_ptr, + tcache_ring[tcache_id].used, tcache_ring[tcache_id].size, insns_compiled, host_insn_count, (float)host_insn_count / insns_compiled); - if ((sh2->pc & 0xc6000000) == 0x02000000) // ROM - dbg(2, " hash collisions %d/%d", hash_collisions, block_counts[tcache_id]); + if ((sh2->pc & 0xc6000000) == 0x02000000) { // ROM + dbg(2, " hash collisions %d/%d", hash_collisions, block_ring[tcache_id].used); + Pico32x.emu_flags |= P32XF_DRC_ROM_C; + } /* printf("~~~\n"); tcache_dsm_ptrs[tcache_id] = block_entry_ptr; @@ -2810,63 +5251,248 @@ end_op: printf("~~~\n"); */ -#if (DRC_DEBUG & 4) - fflush(stdout); -#endif - return block_entry_ptr; } static void sh2_generate_utils(void) { - int arg0, arg1, arg2, sr, tmp; - - sh2_drc_write32 = p32x_sh2_write32; - sh2_drc_read8 = p32x_sh2_read8; - sh2_drc_read16 = p32x_sh2_read16; - sh2_drc_read32 = p32x_sh2_read32; + int arg0, arg1, arg2, arg3, sr, tmp, tmp2; +#if DRC_DEBUG + int hic = host_insn_count; // don't count utils for insn statistics +#endif host_arg2reg(arg0, 0); host_arg2reg(arg1, 1); host_arg2reg(arg2, 2); - emith_move_r_r(arg0, arg0); // nop + host_arg2reg(arg3, 3); - // sh2_drc_exit(void) + // sh2_drc_write8(u32 a, u32 d) + sh2_drc_write8 = (void *)tcache_ptr; + emith_ctx_read_ptr(arg2, offsetof(SH2, write8_tab)); + emith_sh2_wcall(arg0, arg1, arg2, arg3); + emith_flush(); + + // sh2_drc_write16(u32 a, u32 d) + sh2_drc_write16 = (void *)tcache_ptr; + emith_ctx_read_ptr(arg2, offsetof(SH2, write16_tab)); + emith_sh2_wcall(arg0, arg1, arg2, arg3); + emith_flush(); + + // sh2_drc_write32(u32 a, u32 d) + sh2_drc_write32 = (void *)tcache_ptr; + emith_ctx_read_ptr(arg2, offsetof(SH2, write32_tab)); + emith_sh2_wcall(arg0, arg1, arg2, arg3); + emith_flush(); + + // d = sh2_drc_read8(u32 a) + sh2_drc_read8 = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map)); + EMITH_HINT_COND(DCOND_CS); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CS); + emith_and_r_r_c(DCOND_CC, arg0, arg3); + emit_le_ptr8(DCOND_CC, arg0); + emith_read8s_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0); + emith_ret_c(DCOND_CC); + EMITH_SJMP_END(DCOND_CS); + emith_move_r_r_ptr(arg1, CONTEXT_REG); + emith_abijump_reg(arg2); + emith_flush(); + + // d = sh2_drc_read16(u32 a) + sh2_drc_read16 = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map)); + EMITH_HINT_COND(DCOND_CS); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CS); + emith_and_r_r_c(DCOND_CC, arg0, arg3); + emith_read16s_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0); + emith_ret_c(DCOND_CC); + EMITH_SJMP_END(DCOND_CS); + emith_move_r_r_ptr(arg1, CONTEXT_REG); + emith_abijump_reg(arg2); + emith_flush(); + + // d = sh2_drc_read32(u32 a) + sh2_drc_read32 = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map)); + EMITH_HINT_COND(DCOND_CS); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CS); + emith_and_r_r_c(DCOND_CC, arg0, arg3); + emith_read_r_r_r_c(DCOND_CC, RET_REG, arg2, arg0); + emit_le_swap(DCOND_CC, RET_REG); + emith_ret_c(DCOND_CC); + EMITH_SJMP_END(DCOND_CS); + emith_move_r_r_ptr(arg1, CONTEXT_REG); + emith_abijump_reg(arg2); + emith_flush(); + + // d = sh2_drc_read8_poll(u32 a) + sh2_drc_read8_poll = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read8_map)); + EMITH_HINT_COND(DCOND_CS); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CC); + emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); + emith_abijump_reg_c(DCOND_CS, arg2); + EMITH_SJMP_END(DCOND_CC); + emith_and_r_r_r(arg1, arg0, arg3); + emit_le_ptr8(-1, arg1); + emith_read8s_r_r_r(arg1, arg2, arg1); + emith_push_ret(arg1); + emith_move_r_r_ptr(arg2, CONTEXT_REG); + emith_abicall(p32x_sh2_poll_memory8); + emith_pop_and_ret(arg1); + emith_flush(); + + // d = sh2_drc_read16_poll(u32 a) + sh2_drc_read16_poll = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read16_map)); + EMITH_HINT_COND(DCOND_CS); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CC); + emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); + emith_abijump_reg_c(DCOND_CS, arg2); + EMITH_SJMP_END(DCOND_CC); + emith_and_r_r_r(arg1, arg0, arg3); + emith_read16s_r_r_r(arg1, arg2, arg1); + emith_push_ret(arg1); + emith_move_r_r_ptr(arg2, CONTEXT_REG); + emith_abicall(p32x_sh2_poll_memory16); + emith_pop_and_ret(arg1); + emith_flush(); + + // d = sh2_drc_read32_poll(u32 a) + sh2_drc_read32_poll = (void *)tcache_ptr; + emith_ctx_read_ptr(arg1, offsetof(SH2, read32_map)); + EMITH_HINT_COND(DCOND_CS); + emith_sh2_rcall(arg0, arg1, arg2, arg3); + EMITH_SJMP_START(DCOND_CC); + emith_move_r_r_ptr_c(DCOND_CS, arg1, CONTEXT_REG); + emith_abijump_reg_c(DCOND_CS, arg2); + EMITH_SJMP_END(DCOND_CC); + emith_and_r_r_r(arg1, arg0, arg3); + emith_read_r_r_r(arg1, arg2, arg1); + emit_le_swap(-1, arg1); + emith_push_ret(arg1); + emith_move_r_r_ptr(arg2, CONTEXT_REG); + emith_abicall(p32x_sh2_poll_memory32); + emith_pop_and_ret(arg1); + emith_flush(); + + // sh2_drc_exit(u32 pc) sh2_drc_exit = (void *)tcache_ptr; + emith_ctx_write(arg0, SHR_PC * 4); emit_do_static_regs(1, arg2); emith_sh2_drc_exit(); + emith_flush(); - // sh2_drc_dispatcher(void) + // sh2_drc_dispatcher(u32 pc) sh2_drc_dispatcher = (void *)tcache_ptr; - sr = rcache_get_reg(SHR_SR, RC_GR_READ); - emith_cmp_r_imm(sr, 0); - emith_jump_cond(DCOND_LT, sh2_drc_exit); - rcache_invalidate(); - emith_ctx_read(arg0, SHR_PC * 4); - emith_ctx_read(arg1, offsetof(SH2, is_slave)); - emith_add_r_r_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp)); - emith_call(dr_lookup_block); - emit_block_entry(); + emith_ctx_write(arg0, SHR_PC * 4); +#if BRANCH_CACHE + // check if PC is in branch target cache + emith_and_r_r_imm(arg1, arg0, (ARRAY_SIZE(sh2s->branch_cache)-1)*8); + emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 1 : 0); + emith_read_r_r_offs(arg2, arg1, offsetof(SH2, branch_cache)); + emith_cmp_r_r(arg2, arg0); + EMITH_SJMP_START(DCOND_NE); +#if (DRC_DEBUG & 128) + emith_move_r_ptr_imm(arg2, (uptr)&bchit); + emith_read_r_r_offs_c(DCOND_EQ, arg3, arg2, 0); + emith_add_r_imm_c(DCOND_EQ, arg3, 1); + emith_write_r_r_offs_c(DCOND_EQ, arg3, arg2, 0); +#endif + emith_read_r_r_offs_ptr_c(DCOND_EQ, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *)); + emith_jump_reg_c(DCOND_EQ, RET_REG); + EMITH_SJMP_END(DCOND_NE); +#endif + emith_move_r_r_ptr(arg1, CONTEXT_REG); + emith_add_r_r_ptr_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp)); + emith_abicall(dr_lookup_block); + // store PC and block entry ptr (in arg0) in branch target cache + emith_tst_r_r_ptr(RET_REG, RET_REG); + EMITH_SJMP_START(DCOND_EQ); +#if BRANCH_CACHE +#if (DRC_DEBUG & 128) + emith_move_r_ptr_imm(arg2, (uptr)&bcmiss); + emith_read_r_r_offs_c(DCOND_NE, arg3, arg2, 0); + emith_add_r_imm_c(DCOND_NE, arg3, 1); + emith_write_r_r_offs_c(DCOND_NE, arg3, arg2, 0); +#endif + emith_ctx_read_c(DCOND_NE, arg2, SHR_PC * 4); + emith_and_r_r_imm(arg1, arg2, (ARRAY_SIZE(sh2s->branch_cache)-1)*8); + emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg1, sizeof(void *) == 8 ? 1 : 0); + emith_write_r_r_offs_c(DCOND_NE, arg2, arg1, offsetof(SH2, branch_cache)); + emith_write_r_r_offs_ptr_c(DCOND_NE, RET_REG, arg1, offsetof(SH2, branch_cache) + sizeof(void *)); +#endif + emith_jump_reg_c(DCOND_NE, RET_REG); + EMITH_SJMP_END(DCOND_EQ); // lookup failed, call sh2_translate() - emith_move_r_r(arg0, CONTEXT_REG); + emith_move_r_r_ptr(arg0, CONTEXT_REG); emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); // tcache_id - emith_call(sh2_translate); - emit_block_entry(); - // sh2_translate() failed, flush cache and retry - emith_ctx_read(arg0, offsetof(SH2, drc_tmp)); - emith_call(flush_tcache); - emith_move_r_r(arg0, CONTEXT_REG); - emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); - emith_call(sh2_translate); - emit_block_entry(); + emith_abicall(sh2_translate); + emith_tst_r_r_ptr(RET_REG, RET_REG); + EMITH_SJMP_START(DCOND_EQ); + emith_jump_reg_c(DCOND_NE, RET_REG); + EMITH_SJMP_END(DCOND_EQ); // XXX: can't translate, fail - emith_call(dr_failure); + emith_abicall(dr_failure); + emith_flush(); + +#if CALL_STACK + // pc = sh2_drc_dispatcher_call(u32 pc) + sh2_drc_dispatcher_call = (void *)tcache_ptr; + emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); + emith_add_r_imm(arg2, (u32)(2*sizeof(void *))); + emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); + emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); + emith_add_r_r_r_lsl_ptr(arg3, CONTEXT_REG, arg2, 0); + rcache_get_reg_arg(2, SHR_PR, NULL); + emith_add_r_ret(arg1); + emith_write_r_r_offs_ptr(arg1, arg3, offsetof(SH2, rts_cache)+sizeof(void *)); + emith_write_r_r_offs(arg2, arg3, offsetof(SH2, rts_cache)); + rcache_flush(); + emith_ret(); + emith_flush(); + + // sh2_drc_dispatcher_return(u32 pc) + sh2_drc_dispatcher_return = (void *)tcache_ptr; + emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); + emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg2, 0); + emith_read_r_r_offs(arg3, arg1, offsetof(SH2, rts_cache)); + emith_cmp_r_r(arg0, arg3); +#if (DRC_DEBUG & 128) + EMITH_SJMP_START(DCOND_EQ); + emith_move_r_ptr_imm(arg3, (uptr)&rcmiss); + emith_read_r_r_offs_c(DCOND_NE, arg1, arg3, 0); + emith_add_r_imm_c(DCOND_NE, arg1, 1); + emith_write_r_r_offs_c(DCOND_NE, arg1, arg3, 0); + emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); + EMITH_SJMP_END(DCOND_EQ); +#else + emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); +#endif + emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *)); + emith_sub_r_imm(arg2, (u32)(2*sizeof(void *))); + emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); + emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); +#if (DRC_DEBUG & 128) + emith_move_r_ptr_imm(arg3, (uptr)&rchit); + emith_read_r_r_offs(arg1, arg3, 0); + emith_add_r_imm(arg1, 1); + emith_write_r_r_offs(arg1, arg3, 0); +#endif + emith_jump_reg(arg0); + emith_flush(); +#endif // sh2_drc_test_irq(void) // assumes it's called from main function (may jump to dispatcher) sh2_drc_test_irq = (void *)tcache_ptr; emith_ctx_read(arg1, offsetof(SH2, pending_level)); - sr = rcache_get_reg(SHR_SR, RC_GR_READ); + sr = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); emith_lsr(arg0, sr, I_SHIFT); emith_and_r_imm(arg0, 0x0f); emith_cmp_r_r(arg1, arg0); // pending_level > ((sr >> 4) & 0x0f)? @@ -2874,61 +5500,70 @@ static void sh2_generate_utils(void) emith_ret_c(DCOND_LE); // nope, return EMITH_SJMP_END(DCOND_GT); // adjust SP - tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); + tmp = rcache_get_reg(SHR_SP, RC_GR_RMW, NULL); emith_sub_r_imm(tmp, 4*2); rcache_clean(); // push SR - tmp = rcache_get_reg_arg(0, SHR_SP); - emith_add_r_imm(tmp, 4); - tmp = rcache_get_reg_arg(1, SHR_SR); + tmp = rcache_get_reg_arg(0, SHR_SP, &tmp2); + emith_add_r_r_imm(tmp, tmp2, 4); + tmp = rcache_get_reg_arg(1, SHR_SR, NULL); emith_clear_msb(tmp, tmp, 22); - emith_move_r_r(arg2, CONTEXT_REG); - emith_call(p32x_sh2_write32); // XXX: use sh2_drc_write32? - rcache_invalidate(); + emith_move_r_r_ptr(arg2, CONTEXT_REG); + rcache_invalidate_tmp(); + emith_abicall(p32x_sh2_write32); // XXX: use sh2_drc_write32? // push PC - rcache_get_reg_arg(0, SHR_SP); - emith_ctx_read(arg1, SHR_PC * 4); - emith_move_r_r(arg2, CONTEXT_REG); - emith_call(p32x_sh2_write32); - rcache_invalidate(); + rcache_get_reg_arg(0, SHR_SP, NULL); + rcache_get_reg_arg(1, SHR_PC, NULL); + emith_move_r_r_ptr(arg2, CONTEXT_REG); + rcache_invalidate_tmp(); + emith_abicall(p32x_sh2_write32); // update I, cycles, do callback emith_ctx_read(arg1, offsetof(SH2, pending_level)); - sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW, NULL); emith_bic_r_imm(sr, I); emith_or_r_r_lsl(sr, arg1, I_SHIFT); emith_sub_r_imm(sr, 13 << 12); // at least 13 cycles rcache_flush(); - emith_move_r_r(arg0, CONTEXT_REG); - emith_call_ctx(offsetof(SH2, irq_callback)); // vector = sh2->irq_callback(sh2, level); + emith_move_r_r_ptr(arg0, CONTEXT_REG); + emith_abicall_ctx(offsetof(SH2, irq_callback)); // vector = sh2->irq_callback(sh2, level); // obtain new PC - emith_lsl(arg0, arg0, 2); - emith_ctx_read(arg1, SHR_VBR * 4); - emith_add_r_r(arg0, arg1); - emit_memhandler_read(2); - emith_ctx_write(arg0, SHR_PC * 4); -#ifdef __i386__ - emith_add_r_imm(xSP, 4); // fix stack -#endif - emith_jump(sh2_drc_dispatcher); + tmp = rcache_get_reg_arg(1, SHR_VBR, &tmp2); + emith_add_r_r_r_lsl(arg0, tmp2, RET_REG, 2); + emith_call(sh2_drc_read32); + if (arg0 != RET_REG) + emith_move_r_r(arg0, RET_REG); + emith_call_cleanup(); rcache_invalidate(); + emith_jump(sh2_drc_dispatcher); + emith_flush(); // sh2_drc_entry(SH2 *sh2) sh2_drc_entry = (void *)tcache_ptr; emith_sh2_drc_entry(); - emith_move_r_r(CONTEXT_REG, arg0); // move ctx, arg0 + emith_move_r_r_ptr(CONTEXT_REG, arg0); // move ctx, arg0 emit_do_static_regs(0, arg2); emith_call(sh2_drc_test_irq); + emith_ctx_read(arg0, SHR_PC * 4); emith_jump(sh2_drc_dispatcher); + emith_flush(); - // sh2_drc_write8(u32 a, u32 d) - sh2_drc_write8 = (void *)tcache_ptr; - emith_ctx_read(arg2, offsetof(SH2, write8_tab)); - emith_sh2_wcall(arg0, arg2); +#ifdef DRC_SR_REG + // sh2_drc_save_sr(SH2 *sh2) + sh2_drc_save_sr = (void *)tcache_ptr; + tmp = rcache_get_reg(SHR_SR, RC_GR_READ, NULL); + emith_write_r_r_offs(tmp, arg0, SHR_SR * 4); + rcache_invalidate(); + emith_ret(); + emith_flush(); - // sh2_drc_write16(u32 a, u32 d) - sh2_drc_write16 = (void *)tcache_ptr; - emith_ctx_read(arg2, offsetof(SH2, write16_tab)); - emith_sh2_wcall(arg0, arg2); + // sh2_drc_restore_sr(SH2 *sh2) + sh2_drc_restore_sr = (void *)tcache_ptr; + tmp = rcache_get_reg(SHR_SR, RC_GR_WRITE, NULL); + emith_read_r_r_offs(tmp, arg0, SHR_SR * 4); + rcache_flush(); + emith_ret(); + emith_flush(); +#endif #ifdef PDB_NET // debug @@ -2943,6 +5578,7 @@ static void sh2_generate_utils(void) emith_adc_r_imm(arg2, 0x01000000); \ emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[1])); \ emith_pop_and_ret(); \ + emith_flush(); \ func = tmp; \ } #define MAKE_WRITE_WRAPPER(func) { \ @@ -2953,8 +5589,9 @@ static void sh2_generate_utils(void) emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[1])); \ emith_adc_r_imm(arg2, 0x01000000); \ emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[1])); \ - emith_move_r_r(arg2, CONTEXT_REG); \ + emith_move_r_r_ptr(arg2, CONTEXT_REG); \ emith_jump(func); \ + emith_flush(); \ func = tmp; \ } @@ -2964,151 +5601,115 @@ static void sh2_generate_utils(void) MAKE_WRITE_WRAPPER(sh2_drc_write8); MAKE_WRITE_WRAPPER(sh2_drc_write16); MAKE_WRITE_WRAPPER(sh2_drc_write32); -#if (DRC_DEBUG & 4) - host_dasm_new_symbol(sh2_drc_read8); - host_dasm_new_symbol(sh2_drc_read16); - host_dasm_new_symbol(sh2_drc_read32); - host_dasm_new_symbol(sh2_drc_write32); -#endif + MAKE_READ_WRAPPER(sh2_drc_read8_poll); + MAKE_READ_WRAPPER(sh2_drc_read16_poll); + MAKE_READ_WRAPPER(sh2_drc_read32_poll); #endif + emith_pool_commit(0); rcache_invalidate(); #if (DRC_DEBUG & 4) host_dasm_new_symbol(sh2_drc_entry); host_dasm_new_symbol(sh2_drc_dispatcher); +#if CALL_STACK + host_dasm_new_symbol(sh2_drc_dispatcher_call); + host_dasm_new_symbol(sh2_drc_dispatcher_return); +#endif host_dasm_new_symbol(sh2_drc_exit); host_dasm_new_symbol(sh2_drc_test_irq); host_dasm_new_symbol(sh2_drc_write8); host_dasm_new_symbol(sh2_drc_write16); + host_dasm_new_symbol(sh2_drc_write32); + host_dasm_new_symbol(sh2_drc_read8); + host_dasm_new_symbol(sh2_drc_read16); + host_dasm_new_symbol(sh2_drc_read32); + host_dasm_new_symbol(sh2_drc_read8_poll); + host_dasm_new_symbol(sh2_drc_read16_poll); + host_dasm_new_symbol(sh2_drc_read32_poll); +#ifdef DRC_SR_REG + host_dasm_new_symbol(sh2_drc_save_sr); + host_dasm_new_symbol(sh2_drc_restore_sr); +#endif +#endif + +#if DRC_DEBUG + host_insn_count = hic; #endif } -static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 ram_mask) +static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, int free) { - struct block_link *bl, *bl_next, *bl_unresolved; - u32 i, addr, end_addr; - void *tmp; - - dbg(2, " killing entry %08x-%08x-%08x, blkid %d,%d", - bd->addr, bd->addr + bd->size_nolit, bd->addr + bd->size, - tcache_id, bd - block_tables[tcache_id]); - if (bd->addr == 0 || bd->entry_count == 0) { - dbg(1, " killing dead block!? %08x", bd->addr); - return; - } - - // remove from inval_lookup - addr = bd->addr & ~(INVAL_PAGE_SIZE - 1); - end_addr = bd->addr + bd->size; - for (; addr < end_addr; addr += INVAL_PAGE_SIZE) { - i = (addr & ram_mask) / INVAL_PAGE_SIZE; - rm_from_block_list(&inval_lookup[tcache_id][i], bd); - } - - tmp = tcache_ptr; - bl_unresolved = unresolved_links[tcache_id]; - - // remove from hash table, make incoming links unresolved - // XXX: maybe patch branches w/flush instead? - for (i = 0; i < bd->entry_count; i++) { - rm_from_hashlist(&bd->entryp[i], tcache_id); - - // since we never reuse tcache space of dead blocks, - // insert jump to dispatcher for blocks that are linked to this - tcache_ptr = bd->entryp[i].tcache_ptr; - emit_move_r_imm32(SHR_PC, bd->entryp[i].pc); - rcache_flush(); - emith_jump(sh2_drc_dispatcher); - - host_instructions_updated(bd->entryp[i].tcache_ptr, tcache_ptr); - - for (bl = bd->entryp[i].links; bl != NULL; ) { - bl_next = bl->next; - bl->next = bl_unresolved; - bl_unresolved = bl; - bl = bl_next; - } - } - - tcache_ptr = tmp; - unresolved_links[tcache_id] = bl_unresolved; - - bd->addr = bd->size = bd->size_nolit = 0; - bd->entry_count = 0; -} - -static void sh2_smc_rm_block(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, u32 mask) -{ - struct block_list **blist = NULL, *entry; - u32 from = ~0, to = 0, end_addr, taddr, i; + struct block_list **blist, *entry, *next; + u32 mask = RAM_SIZE(tcache_id) - 1; + u32 wtmask = ~0x20000000; // writethrough area mask + u32 start_addr, end_addr; + u32 start_lit, end_lit; struct block_desc *block; + int removed = 0, rest; - blist = &inval_lookup[tcache_id][(a & mask) / INVAL_PAGE_SIZE]; - entry = *blist; - while (entry != NULL) { - block = entry->block; - end_addr = block->addr + block->size; - if (block->addr <= a && a < end_addr) { - // get addr range that includes all removed blocks - if (from > block->addr) - from = block->addr; - if (to < end_addr) - to = end_addr; + // ignore cache-through + a &= wtmask; - sh2_smc_rm_block_entry(block, tcache_id, mask); - if (a >= block->addr + block->size_nolit) - literal_disabled_frames = 3; - - // entry lost, restart search - entry = *blist; - continue; - } - entry = entry->next; - } - - if (from >= to) - return; - - // update range around a to match latest state - from &= ~(INVAL_PAGE_SIZE - 1); - to |= (INVAL_PAGE_SIZE - 1); - for (taddr = from; taddr < to; taddr += INVAL_PAGE_SIZE) { - i = (taddr & mask) / INVAL_PAGE_SIZE; - entry = inval_lookup[tcache_id][i]; - - for (; entry != NULL; entry = entry->next) { + do { + blist = &inval_lookup[tcache_id][(a & mask) / INVAL_PAGE_SIZE]; + entry = *blist; + // go through the block list for this range + while (entry != NULL) { + next = entry->next; block = entry->block; - - if (block->addr > a) { - if (to > block->addr) - to = block->addr; - } - else { - end_addr = block->addr + block->size; - if (from < end_addr) - from = end_addr; + start_addr = block->addr & wtmask; + end_addr = start_addr + block->size; + start_lit = block->addr_lit & wtmask; + end_lit = start_lit + block->size_lit; + // disable/delete block if it covers the modified address + if ((start_addr < a+len && a < end_addr) || + (start_lit < a+len && a < end_lit)) + { + dbg(2, "smc remove @%08x", a); + end_addr = (start_lit < a+len && block->size_lit ? a : 0); + dr_rm_block_entry(block, tcache_id, end_addr, free); + removed = 1; } + entry = next; } + rest = INVAL_PAGE_SIZE - (a & (INVAL_PAGE_SIZE-1)); + a += rest, len -= rest; + } while (len > 0); + + if (!removed) { + if (len <= 4) + dbg(2, "rm_blocks called @%08x, no work?", _a); + return; } - // clear code marks - if (from < to) { - u16 *p = drc_ram_blk + ((from & mask) >> shift); - memset(p, 0, (to - from) >> (shift - 1)); +#if BRANCH_CACHE + if (tcache_id) + memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + else { + memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); } +#endif +#if CALL_STACK + if (tcache_id) { + memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + sh2s[tcache_id-1].rts_cache_idx = 0; + } else { + memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; + } +#endif } -void sh2_drc_wcheck_ram(unsigned int a, int val, int cpuid) +void sh2_drc_wcheck_ram(u32 a, unsigned len, SH2 *sh2) { - dbg(2, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); - sh2_smc_rm_block(a, Pico32xMem->drcblk_ram, 0, SH2_DRCBLK_RAM_SHIFT, 0x3ffff); + sh2_smc_rm_blocks(a, len, 0, 0); } -void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid) +void sh2_drc_wcheck_da(u32 a, unsigned len, SH2 *sh2) { - dbg(2, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); - sh2_smc_rm_block(a, Pico32xMem->drcblk_da[cpuid], - 1 + cpuid, SH2_DRCBLK_DA_SHIFT, 0xfff); + sh2_smc_rm_blocks(a, len, 1 + sh2->is_slave, 0); } int sh2_execute_drc(SH2 *sh2c, int cycles) @@ -3119,77 +5720,204 @@ int sh2_execute_drc(SH2 *sh2c, int cycles) // bit11 contains T saved for delay slot // others are usual SH2 flags sh2c->sr &= 0x3f3; - sh2c->sr |= cycles << 12; - sh2_drc_entry(sh2c); + sh2c->sr |= (cycles-1) << 12; +#if (DRC_DEBUG & 8) + lastpc = lastcnt = 0; +#endif + + sh2c->state |= SH2_IN_DRC; + host_call(sh2_drc_entry, (SH2 *))(sh2c); + sh2c->state &= ~SH2_IN_DRC; // TODO: irq cycles - ret_cycles = (signed int)sh2c->sr >> 12; - if (ret_cycles > 0) - dbg(1, "warning: drc returned with cycles: %d", ret_cycles); + ret_cycles = (int32_t)sh2c->sr >> 12; + if (ret_cycles >= 0) + dbg(1, "warning: drc returned with cycles: %d, pc %08x", ret_cycles, sh2c->pc); +#if (DRC_DEBUG & 8) + if (lastcnt) + dbg(8, "= %csh2 enter %08x %p (%d times), c=%d", sh2c->is_slave?'s':'m', + lastpc, lastblock, lastcnt, (signed int)sh2c->sr >> 12); +#endif sh2c->sr &= 0x3f3; - return ret_cycles; + return ret_cycles+1; } -#if (DRC_DEBUG & 2) -void block_stats(void) +static void block_stats(void) { - int c, b, i, total = 0; +#if (DRC_DEBUG & 2) + int c, b, i; + long total = 0; printf("block stats:\n"); - for (b = 0; b < ARRAY_SIZE(block_tables); b++) - for (i = 0; i < block_counts[b]; i++) + for (b = 0; b < ARRAY_SIZE(block_tables); b++) { + for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) if (block_tables[b][i].addr != 0) total += block_tables[b][i].refcount; + } + printf("total: %ld\n",total); - for (c = 0; c < 10; c++) { + for (c = 0; c < 20; c++) { struct block_desc *blk, *maxb = NULL; int max = 0; for (b = 0; b < ARRAY_SIZE(block_tables); b++) { - for (i = 0; i < block_counts[b]; i++) { - blk = &block_tables[b][i]; - if (blk->addr != 0 && blk->refcount > max) { + for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) + if ((blk = &block_tables[b][i])->addr != 0 && blk->refcount > max) { max = blk->refcount; maxb = blk; } - } } if (maxb == NULL) break; - printf("%08x %9d %2.3f%%\n", maxb->addr, maxb->refcount, + printf("%08lx %p %9d %2.3f%%\n", (ulong)maxb->addr, maxb->tcache_ptr, maxb->refcount, (double)maxb->refcount / total * 100.0); maxb->refcount = 0; } - for (b = 0; b < ARRAY_SIZE(block_tables); b++) - for (i = 0; i < block_counts[b]; i++) + for (b = 0; b < ARRAY_SIZE(block_tables); b++) + for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) block_tables[b][i].refcount = 0; -} -#else -#define block_stats() #endif +} + +void entry_stats(void) +{ +#if (DRC_DEBUG & 32) + int c, b, i, j; + long total = 0; + + printf("block entry stats:\n"); + for (b = 0; b < ARRAY_SIZE(block_tables); b++) { + for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) + for (j = 0; j < block_tables[b][i].entry_count; j++) + total += block_tables[b][i].entryp[j].entry_count; + } + printf("total: %ld\n",total); + + for (c = 0; c < 20; c++) { + struct block_desc *blk; + struct block_entry *maxb = NULL; + int max = 0; + for (b = 0; b < ARRAY_SIZE(block_tables); b++) { + for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) { + blk = &block_tables[b][i]; + for (j = 0; j < blk->entry_count; j++) + if (blk->entryp[j].entry_count > max) { + max = blk->entryp[j].entry_count; + maxb = &blk->entryp[j]; + } + } + } + if (maxb == NULL) + break; + printf("%08lx %p %9d %2.3f%%\n", (ulong)maxb->pc, maxb->tcache_ptr, maxb->entry_count, + (double)100 * maxb->entry_count / total); + maxb->entry_count = 0; + } + + for (b = 0; b < ARRAY_SIZE(block_tables); b++) { + for (i = block_ring[b].first; i != block_ring[b].next; i = (i+1)%block_ring[b].size) + for (j = 0; j < block_tables[b][i].entry_count; j++) + block_tables[b][i].entryp[j].entry_count = 0; + } +#endif +} + +static void backtrace(void) +{ +#if (DRC_DEBUG & 1024) + int i; + printf("backtrace master:\n"); + for (i = 0; i < ARRAY_SIZE(csh2[0]); i++) + SH2_DUMP(&csh2[0][i], "bt msh2"); + printf("backtrace slave:\n"); + for (i = 0; i < ARRAY_SIZE(csh2[1]); i++) + SH2_DUMP(&csh2[1][i], "bt ssh2"); +#endif +} + +static void state_dump(void) +{ +#if (DRC_DEBUG & 2048) + int i; + + SH2_DUMP(&sh2s[0], "master"); + printf("VBR msh2: %lx\n", (ulong)sh2s[0].vbr); + for (i = 0; i < 0x60; i++) { + printf("%08lx ",(ulong)p32x_sh2_read32(sh2s[0].vbr + i*4, &sh2s[0])); + if ((i+1) % 8 == 0) printf("\n"); + } + printf("stack msh2: %lx\n", (ulong)sh2s[0].r[15]); + for (i = -0x30; i < 0x30; i++) { + printf("%08lx ",(ulong)p32x_sh2_read32(sh2s[0].r[15] + i*4, &sh2s[0])); + if ((i+1) % 8 == 0) printf("\n"); + } + SH2_DUMP(&sh2s[1], "slave"); + printf("VBR ssh2: %lx\n", (ulong)sh2s[1].vbr); + for (i = 0; i < 0x60; i++) { + printf("%08lx ",(ulong)p32x_sh2_read32(sh2s[1].vbr + i*4, &sh2s[1])); + if ((i+1) % 8 == 0) printf("\n"); + } + printf("stack ssh2: %lx\n", (ulong)sh2s[1].r[15]); + for (i = -0x30; i < 0x30; i++) { + printf("%08lx ",(ulong)p32x_sh2_read32(sh2s[1].r[15] + i*4, &sh2s[1])); + if ((i+1) % 8 == 0) printf("\n"); + } +#endif +} + +static void bcache_stats(void) +{ +#if (DRC_DEBUG & 128) + int i; +#if CALL_STACK + for (i = 1; i < ARRAY_SIZE(sh2s->rts_cache); i++) + if (sh2s[0].rts_cache[i].pc == -1 && sh2s[1].rts_cache[i].pc == -1) break; + + printf("return cache hits:%d misses:%d depth: %d index: %d/%d\n", rchit, rcmiss, i,sh2s[0].rts_cache_idx,sh2s[1].rts_cache_idx); + for (i = 0; i < ARRAY_SIZE(sh2s[0].rts_cache); i++) { + printf("%08lx ",(ulong)sh2s[0].rts_cache[i].pc); + if ((i+1) % 8 == 0) printf("\n"); + } + for (i = 0; i < ARRAY_SIZE(sh2s[1].rts_cache); i++) { + printf("%08lx ",(ulong)sh2s[1].rts_cache[i].pc); + if ((i+1) % 8 == 0) printf("\n"); + } +#endif +#if BRANCH_CACHE + printf("branch cache hits:%d misses:%d\n", bchit, bcmiss); + printf("branch cache master:\n"); + for (i = 0; i < ARRAY_SIZE(sh2s[0].branch_cache); i++) { + printf("%08lx ",(ulong)sh2s[0].branch_cache[i].pc); + if ((i+1) % 8 == 0) printf("\n"); + } + printf("branch cache slave:\n"); + for (i = 0; i < ARRAY_SIZE(sh2s[1].branch_cache); i++) { + printf("%08lx ",(ulong)sh2s[1].branch_cache[i].pc); + if ((i+1) % 8 == 0) printf("\n"); + } +#endif +#endif +} void sh2_drc_flush_all(void) { + backtrace(); + state_dump(); block_stats(); - flush_tcache(0); - flush_tcache(1); - flush_tcache(2); + entry_stats(); + bcache_stats(); + dr_flush_tcache(0); + dr_flush_tcache(1); + dr_flush_tcache(2); + Pico32x.emu_flags &= ~P32XF_DRC_ROM_C; } void sh2_drc_mem_setup(SH2 *sh2) { - // fill the convenience pointers - sh2->p_bios = sh2->is_slave ? Pico32xMem->sh2_rom_s.w : Pico32xMem->sh2_rom_m.w; - sh2->p_da = sh2->data_array; - sh2->p_sdram = Pico32xMem->sdram; - sh2->p_rom = Pico.rom; -} - -void sh2_drc_frame(void) -{ - if (literal_disabled_frames > 0) - literal_disabled_frames--; + // fill the DRC-only convenience pointers + sh2->p_drcblk_da = Pico32xMem->drcblk_da[!!sh2->is_slave]; + sh2->p_drcblk_ram = Pico32xMem->drcblk_ram; } int sh2_drc_init(SH2 *sh2) @@ -3199,39 +5927,62 @@ int sh2_drc_init(SH2 *sh2) if (block_tables[0] == NULL) { for (i = 0; i < TCACHE_BUFFERS; i++) { - block_tables[i] = calloc(block_max_counts[i], sizeof(*block_tables[0])); + block_tables[i] = calloc(BLOCK_MAX_COUNT(i), sizeof(*block_tables[0])); if (block_tables[i] == NULL) goto fail; - // max 2 block links (exits) per block - block_link_pool[i] = calloc(block_link_pool_max_counts[i], + entry_tables[i] = calloc(ENTRY_MAX_COUNT(i), sizeof(*entry_tables[0])); + if (entry_tables[i] == NULL) + goto fail; + block_link_pool[i] = calloc(BLOCK_LINK_MAX_COUNT(i), sizeof(*block_link_pool[0])); if (block_link_pool[i] == NULL) goto fail; - inval_lookup[i] = calloc(ram_sizes[i] / INVAL_PAGE_SIZE, + inval_lookup[i] = calloc(RAM_SIZE(i) / INVAL_PAGE_SIZE, sizeof(inval_lookup[0])); if (inval_lookup[i] == NULL) goto fail; - hash_tables[i] = calloc(hash_table_sizes[i], sizeof(*hash_tables[0])); + hash_tables[i] = calloc(HASH_TABLE_SIZE(i), sizeof(*hash_tables[0])); if (hash_tables[i] == NULL) goto fail; + + unresolved_links[i] = calloc(HASH_TABLE_SIZE(i), sizeof(*unresolved_links[0])); + if (unresolved_links[i] == NULL) + goto fail; +//atexit(sh2_drc_finish); + + RING_INIT(&block_ring[i], block_tables[i], BLOCK_MAX_COUNT(i)); + RING_INIT(&entry_ring[i], entry_tables[i], ENTRY_MAX_COUNT(i)); } - memset(block_counts, 0, sizeof(block_counts)); + + block_list_pool = calloc(BLOCK_LIST_MAX_COUNT, sizeof(*block_list_pool)); + if (block_list_pool == NULL) + goto fail; + block_list_pool_count = 0; + blist_free = NULL; + memset(block_link_pool_counts, 0, sizeof(block_link_pool_counts)); + memset(blink_free, 0, sizeof(blink_free)); drc_cmn_init(); + rcache_init(); + tcache_ptr = tcache; sh2_generate_utils(); - host_instructions_updated(tcache, tcache_ptr); + host_instructions_updated(tcache, tcache_ptr, 1); + emith_update_cache(); - tcache_bases[0] = tcache_ptrs[0] = tcache_ptr; - for (i = 1; i < ARRAY_SIZE(tcache_bases); i++) - tcache_bases[i] = tcache_ptrs[i] = tcache_bases[i - 1] + tcache_sizes[i - 1]; + i = tcache_ptr - tcache; + RING_INIT(&tcache_ring[0], tcache_ptr, tcache_sizes[0] - i); + for (i = 1; i < ARRAY_SIZE(tcache_ring); i++) { + RING_INIT(&tcache_ring[i], tcache_ring[i-1].base + tcache_ring[i-1].size, + tcache_sizes[i]); + } #if (DRC_DEBUG & 4) for (i = 0; i < ARRAY_SIZE(block_tables); i++) - tcache_dsm_ptrs[i] = tcache_bases[i]; + tcache_dsm_ptrs[i] = tcache_ring[i].base; // disasm the utils tcache_dsm_ptrs[0] = tcache; do_host_disasm(0); @@ -3240,6 +5991,9 @@ int sh2_drc_init(SH2 *sh2) hash_collisions = 0; #endif } + memset(sh2->branch_cache, -1, sizeof(sh2->branch_cache)); + memset(sh2->rts_cache, -1, sizeof(sh2->rts_cache)); + sh2->rts_cache_idx = 0; return 0; @@ -3255,24 +6009,49 @@ void sh2_drc_finish(SH2 *sh2) if (block_tables[0] == NULL) return; +#if (DRC_DEBUG & (256|512)) + if (trace[0]) fclose(trace[0]); + if (trace[1]) fclose(trace[1]); + trace[0] = trace[1] = NULL; +#endif + +#if (DRC_DEBUG & 4) + for (i = 0; i < TCACHE_BUFFERS; i++) { + printf("~~~ tcache %d\n", i); +#if 0 + if (tcache_ring[i].first < tcache_ring[i].next) { + tcache_dsm_ptrs[i] = tcache_ring[i].first; + tcache_ptr = tcache_ring[i].next; + do_host_disasm(i); + } else if (tcache_ring[i].used) { + tcache_dsm_ptrs[i] = tcache_ring[i].first; + tcache_ptr = tcache_ring[i].base + tcache_ring[i].size; + do_host_disasm(i); + tcache_dsm_ptrs[i] = tcache_ring[i].base; + tcache_ptr = tcache_ring[i].next; + do_host_disasm(i); + } +#endif + printf("max links: %d\n", block_link_pool_counts[i]); + } + printf("max block list: %d\n", block_list_pool_count); +#endif + sh2_drc_flush_all(); for (i = 0; i < TCACHE_BUFFERS; i++) { -#if (DRC_DEBUG & 4) - printf("~~~ tcache %d\n", i); - tcache_dsm_ptrs[i] = tcache_bases[i]; - tcache_ptr = tcache_ptrs[i]; - do_host_disasm(i); -#endif - if (block_tables[i] != NULL) free(block_tables[i]); block_tables[i] = NULL; - if (block_link_pool[i] == NULL) + if (entry_tables[i] != NULL) + free(entry_tables[i]); + entry_tables[i] = NULL; + if (block_link_pool[i] != NULL) free(block_link_pool[i]); block_link_pool[i] = NULL; + blink_free[i] = NULL; - if (inval_lookup[i] == NULL) + if (inval_lookup[i] != NULL) free(inval_lookup[i]); inval_lookup[i] = NULL; @@ -3280,61 +6059,57 @@ void sh2_drc_finish(SH2 *sh2) free(hash_tables[i]); hash_tables[i] = NULL; } + + if (unresolved_links[i] != NULL) { + free(unresolved_links[i]); + unresolved_links[i] = NULL; + } } + if (block_list_pool != NULL) + free(block_list_pool); + block_list_pool = NULL; + blist_free = NULL; + drc_cmn_cleanup(); } #endif /* DRC_SH2 */ -static void *dr_get_pc_base(u32 pc, int is_slave) +static void *dr_get_pc_base(u32 pc, SH2 *sh2) { - void *ret = NULL; + void *ret; u32 mask = 0; - if ((pc & ~0x7ff) == 0) { - // BIOS - ret = is_slave ? Pico32xMem->sh2_rom_s.w : Pico32xMem->sh2_rom_m.w; - mask = 0x7ff; - } - else if ((pc & 0xfffff000) == 0xc0000000) { - // data array - ret = sh2s[is_slave].data_array; - mask = 0xfff; - } - else if ((pc & 0xc6000000) == 0x06000000) { - // SDRAM - ret = Pico32xMem->sdram; - mask = 0x03ffff; - } - else if ((pc & 0xc6000000) == 0x02000000) { - // ROM - if ((pc & 0x3fffff) < Pico.romsize) - ret = Pico.rom; - mask = 0x3fffff; - } - - if (ret == NULL) - return (void *)-1; // NULL is valid value + ret = p32x_sh2_get_mem_ptr(pc, &mask, sh2); + if (ret == (void *)-1) + return ret; return (char *)ret - (pc & ~mask); } -void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, - u32 *end_literals_out) +u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, + u32 *base_literals_out, u32 *end_literals_out) { u16 *dr_pc_base; u32 pc, op, tmp; u32 end_pc, end_literals = 0; + u32 lowest_literal = 0; u32 lowest_mova = 0; struct op_data *opd; int next_is_delay = 0; int end_block = 0; - int i, i_end; + int is_divop; + int i, i_end, i_div = -1; + u32 crc = 0; + // 2nd pass stuff + int last_btarget; // loop detector + enum { T_UNKNOWN, T_CLEAR, T_SET } t; // T propagation state - memset(op_flags, 0, BLOCK_INSN_LIMIT); + memset(op_flags, 0, sizeof(*op_flags) * BLOCK_INSN_LIMIT); + op_flags[0] |= OF_BTARGET; // block start is always a target - dr_pc_base = dr_get_pc_base(base_pc, is_slave); + dr_pc_base = dr_get_pc_base(base_pc, &sh2s[!!is_slave]); // 1st pass: disassemble for (i = 0, pc = base_pc; ; i++, pc += 2) { @@ -3353,7 +6128,11 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, } else if (end_block || i >= BLOCK_INSN_LIMIT - 2) break; + else if ((lowest_mova && lowest_mova <= pc) || + (lowest_literal && lowest_literal <= pc)) + break; // text area collides with data area + is_divop = 0; op = FETCH_OP(pc); switch ((op & 0xf000) >> 12) { @@ -3365,19 +6144,19 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, switch (GET_Fx()) { case 0: // STC SR,Rn 0000nnnn00000010 - tmp = SHR_SR; + tmp = BITMASK2(SHR_SR, SHR_T); break; case 1: // STC GBR,Rn 0000nnnn00010010 - tmp = SHR_GBR; + tmp = BITMASK1(SHR_GBR); break; case 2: // STC VBR,Rn 0000nnnn00100010 - tmp = SHR_VBR; + tmp = BITMASK1(SHR_VBR); break; default: goto undefined; } opd->op = OP_MOVE; - opd->source = BITMASK1(tmp); + opd->source = tmp; opd->dest = BITMASK1(GET_Rn()); break; case 0x03: @@ -3386,18 +6165,22 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, // BSRF Rm 0000mmmm00000011 opd->op = OP_BRANCH_RF; opd->rm = GET_Rn(); - opd->source = BITMASK1(opd->rm); + opd->source = BITMASK2(SHR_PC, opd->rm); opd->dest = BITMASK1(SHR_PC); if (!(op & 0x20)) opd->dest |= BITMASK1(SHR_PR); opd->cycles = 2; next_is_delay = 1; - end_block = 1; + if (!(opd->dest & BITMASK1(SHR_PR))) + end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); + else + op_flags[i+1+next_is_delay] |= OF_BTARGET; break; case 0x04: // MOV.B Rm,@(R0,Rn) 0000nnnnmmmm0100 case 0x05: // MOV.W Rm,@(R0,Rn) 0000nnnnmmmm0101 case 0x06: // MOV.L Rm,@(R0,Rn) 0000nnnnmmmm0110 opd->source = BITMASK3(GET_Rm(), SHR_R0, GET_Rn()); + opd->dest = BITMASK1(SHR_MEM); break; case 0x07: // MUL.L Rm,Rn 0000nnnnmmmm0111 @@ -3420,7 +6203,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->imm = 1; break; case 2: // CLRMAC 0000000000101000 - opd->dest = BITMASK3(SHR_T, SHR_MACL, SHR_MACH); + opd->dest = BITMASK2(SHR_MACL, SHR_MACH); break; default: goto undefined; @@ -3434,7 +6217,12 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, break; case 1: // DIV0U 0000000000011001 CHECK_UNHANDLED_BITS(0xf00, undefined); + opd->op = OP_DIV0; + opd->source = BITMASK1(SHR_SR); opd->dest = BITMASK2(SHR_SR, SHR_T); + div(opd) = (struct div){ .rn=SHR_MEM, .rm=SHR_MEM, .ro=SHR_MEM }; + i_div = i; + is_divop = 1; break; case 2: // MOVT Rn 0000nnnn00101001 opd->source = BITMASK1(SHR_T); @@ -3474,19 +6262,20 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->dest = BITMASK1(SHR_PC); opd->cycles = 2; next_is_delay = 1; - end_block = 1; + end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); break; case 1: // SLEEP 0000000000011011 opd->op = OP_SLEEP; + opd->cycles = 3; end_block = 1; break; case 2: // RTE 0000000000101011 opd->op = OP_RTE; opd->source = BITMASK1(SHR_SP); - opd->dest = BITMASK2(SHR_SR, SHR_PC); + opd->dest = BITMASK4(SHR_SP, SHR_SR, SHR_T, SHR_PC); opd->cycles = 4; next_is_delay = 1; - end_block = 1; + end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); break; default: goto undefined; @@ -3495,11 +6284,12 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0c: // MOV.B @(R0,Rm),Rn 0000nnnnmmmm1100 case 0x0d: // MOV.W @(R0,Rm),Rn 0000nnnnmmmm1101 case 0x0e: // MOV.L @(R0,Rm),Rn 0000nnnnmmmm1110 - opd->source = BITMASK2(GET_Rm(), SHR_R0); + opd->source = BITMASK3(GET_Rm(), SHR_R0, SHR_MEM); opd->dest = BITMASK1(GET_Rn()); + op_flags[i] |= OF_POLL_INSN; break; case 0x0f: // MAC.L @Rm+,@Rn+ 0000nnnnmmmm1111 - opd->source = BITMASK5(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH); + opd->source = BITMASK6(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH, SHR_MEM); opd->dest = BITMASK4(GET_Rm(), GET_Rn(), SHR_MACL, SHR_MACH); opd->cycles = 3; break; @@ -3511,8 +6301,8 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, ///////////////////////////////////////////// case 0x01: // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd - opd->source = BITMASK1(GET_Rm()); - opd->source = BITMASK1(GET_Rn()); + opd->source = BITMASK2(GET_Rm(), GET_Rn()); + opd->dest = BITMASK1(SHR_MEM); opd->imm = (op & 0x0f) * 4; break; @@ -3523,18 +6313,22 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x00: // MOV.B Rm,@Rn 0010nnnnmmmm0000 case 0x01: // MOV.W Rm,@Rn 0010nnnnmmmm0001 case 0x02: // MOV.L Rm,@Rn 0010nnnnmmmm0010 - opd->source = BITMASK1(GET_Rm()); - opd->source = BITMASK1(GET_Rn()); + opd->source = BITMASK2(GET_Rm(), GET_Rn()); + opd->dest = BITMASK1(SHR_MEM); break; case 0x04: // MOV.B Rm,@-Rn 0010nnnnmmmm0100 case 0x05: // MOV.W Rm,@-Rn 0010nnnnmmmm0101 case 0x06: // MOV.L Rm,@-Rn 0010nnnnmmmm0110 opd->source = BITMASK2(GET_Rm(), GET_Rn()); - opd->dest = BITMASK1(GET_Rn()); + opd->dest = BITMASK2(GET_Rn(), SHR_MEM); break; case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111 - opd->source = BITMASK2(GET_Rm(), GET_Rn()); - opd->dest = BITMASK1(SHR_SR); + opd->op = OP_DIV0; + opd->source = BITMASK3(SHR_SR, GET_Rm(), GET_Rn()); + opd->dest = BITMASK2(SHR_SR, SHR_T); + div(opd) = (struct div){ .rn=GET_Rn(), .rm=GET_Rm(), .ro=SHR_MEM }; + i_div = i; + is_divop = 1; break; case 0x08: // TST Rm,Rn 0010nnnnmmmm1000 opd->source = BITMASK2(GET_Rm(), GET_Rn()); @@ -3577,8 +6371,21 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->dest = BITMASK1(SHR_T); break; case 0x04: // DIV1 Rm,Rn 0011nnnnmmmm0100 - opd->source = BITMASK3(GET_Rm(), GET_Rn(), SHR_SR); - opd->dest = BITMASK2(GET_Rn(), SHR_SR); + opd->source = BITMASK4(GET_Rm(), GET_Rn(), SHR_SR, SHR_T); + opd->dest = BITMASK3(GET_Rn(), SHR_SR, SHR_T); + if (i_div >= 0) { + // divide operation: all DIV1 operations must use the same reg pair + if (div(&ops[i_div]).rn == SHR_MEM) + div(&ops[i_div]).rn=GET_Rn(), div(&ops[i_div]).rm=GET_Rm(); + if (div(&ops[i_div]).rn == GET_Rn() && div(&ops[i_div]).rm == GET_Rm()) { + div(&ops[i_div]).div1 += 1; + div(&ops[i_div]).state = 0; + is_divop = 1; + } else { + ops[i_div].imm = 0; + i_div = -1; + } + } break; case 0x05: // DMULU.L Rm,Rn 0011nnnnmmmm0101 case 0x0d: // DMULS.L Rm,Rn 0011nnnnmmmm1101 @@ -3621,6 +6428,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 1: // DT Rn 0100nnnn00010000 opd->source = BITMASK1(GET_Rn()); opd->dest = BITMASK2(GET_Rn(), SHR_T); + op_flags[i] |= OF_DELAY_INSN; break; default: goto undefined; @@ -3647,31 +6455,31 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, switch (op & 0x3f) { case 0x02: // STS.L MACH,@-Rn 0100nnnn00000010 - tmp = SHR_MACH; + tmp = BITMASK1(SHR_MACH); break; case 0x12: // STS.L MACL,@-Rn 0100nnnn00010010 - tmp = SHR_MACL; + tmp = BITMASK1(SHR_MACL); break; case 0x22: // STS.L PR,@-Rn 0100nnnn00100010 - tmp = SHR_PR; + tmp = BITMASK1(SHR_PR); break; case 0x03: // STC.L SR,@-Rn 0100nnnn00000011 - tmp = SHR_SR; + tmp = BITMASK2(SHR_SR, SHR_T); opd->cycles = 2; break; case 0x13: // STC.L GBR,@-Rn 0100nnnn00010011 - tmp = SHR_GBR; + tmp = BITMASK1(SHR_GBR); opd->cycles = 2; break; case 0x23: // STC.L VBR,@-Rn 0100nnnn00100011 - tmp = SHR_VBR; + tmp = BITMASK1(SHR_VBR); opd->cycles = 2; break; default: goto undefined; } - opd->source = BITMASK2(GET_Rn(), tmp); - opd->dest = BITMASK1(GET_Rn()); + opd->source = BITMASK1(GET_Rn()) | tmp; + opd->dest = BITMASK2(GET_Rn(), SHR_MEM); break; case 0x04: case 0x05: @@ -3683,6 +6491,19 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->dest = BITMASK2(GET_Rn(), SHR_T); break; case 0x24: // ROTCL Rn 0100nnnn00100100 + if (i_div >= 0) { + // divide operation: all ROTCL operations must use the same register + if (div(&ops[i_div]).ro == SHR_MEM) + div(&ops[i_div]).ro = GET_Rn(); + if (div(&ops[i_div]).ro == GET_Rn() && !div(&ops[i_div]).state) { + div(&ops[i_div]).rotcl += 1; + div(&ops[i_div]).state = 1; + is_divop = 1; + } else { + ops[i_div].imm = 0; + i_div = -1; + } + } case 0x25: // ROTCR Rn 0100nnnn00100101 opd->source = BITMASK2(GET_Rn(), SHR_T); opd->dest = BITMASK2(GET_Rn(), SHR_T); @@ -3700,31 +6521,34 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, switch (op & 0x3f) { case 0x06: // LDS.L @Rm+,MACH 0100mmmm00000110 - tmp = SHR_MACH; + tmp = BITMASK1(SHR_MACH); break; case 0x16: // LDS.L @Rm+,MACL 0100mmmm00010110 - tmp = SHR_MACL; + tmp = BITMASK1(SHR_MACL); break; case 0x26: // LDS.L @Rm+,PR 0100mmmm00100110 - tmp = SHR_PR; + tmp = BITMASK1(SHR_PR); break; case 0x07: // LDC.L @Rm+,SR 0100mmmm00000111 - tmp = SHR_SR; + tmp = BITMASK2(SHR_SR, SHR_T); + opd->op = OP_LDC; opd->cycles = 3; break; case 0x17: // LDC.L @Rm+,GBR 0100mmmm00010111 - tmp = SHR_GBR; + tmp = BITMASK1(SHR_GBR); + opd->op = OP_LDC; opd->cycles = 3; break; case 0x27: // LDC.L @Rm+,VBR 0100mmmm00100111 - tmp = SHR_VBR; + tmp = BITMASK1(SHR_VBR); + opd->op = OP_LDC; opd->cycles = 3; break; default: goto undefined; } - opd->source = BITMASK1(GET_Rn()); - opd->dest = BITMASK2(GET_Rn(), tmp); + opd->source = BITMASK2(GET_Rn(), SHR_MEM); + opd->dest = BITMASK1(GET_Rn()) | tmp; break; case 0x08: case 0x09: @@ -3779,11 +6603,14 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->dest |= BITMASK1(SHR_PC); opd->cycles = 2; next_is_delay = 1; - end_block = 1; + if (!(opd->dest & BITMASK1(SHR_PR))) + end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); + else + op_flags[i+1+next_is_delay] |= OF_BTARGET; break; case 1: // TAS.B @Rn 0100nnnn00011011 - opd->source = BITMASK1(GET_Rn()); - opd->dest = BITMASK1(SHR_T); + opd->source = BITMASK2(GET_Rn(), SHR_MEM); + opd->dest = BITMASK2(SHR_T, SHR_MEM); opd->cycles = 4; break; default: @@ -3794,24 +6621,24 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, switch (GET_Fx()) { case 0: // LDC Rm,SR 0100mmmm00001110 - tmp = SHR_SR; + tmp = BITMASK2(SHR_SR, SHR_T); break; case 1: // LDC Rm,GBR 0100mmmm00011110 - tmp = SHR_GBR; + tmp = BITMASK1(SHR_GBR); break; case 2: // LDC Rm,VBR 0100mmmm00101110 - tmp = SHR_VBR; + tmp = BITMASK1(SHR_VBR); break; default: goto undefined; } - opd->op = OP_MOVE; + opd->op = OP_LDC; opd->source = BITMASK1(GET_Rn()); - opd->dest = BITMASK1(tmp); + opd->dest = tmp; break; case 0x0f: // MAC.W @Rm+,@Rn+ 0100nnnnmmmm1111 - opd->source = BITMASK5(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH); + opd->source = BITMASK6(GET_Rm(), GET_Rn(), SHR_SR, SHR_MACL, SHR_MACH, SHR_MEM); opd->dest = BITMASK4(GET_Rm(), GET_Rn(), SHR_MACL, SHR_MACH); opd->cycles = 3; break; @@ -3823,9 +6650,10 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, ///////////////////////////////////////////// case 0x05: // MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd - opd->source = BITMASK1(GET_Rm()); + opd->source = BITMASK2(GET_Rm(), SHR_MEM); opd->dest = BITMASK1(GET_Rn()); opd->imm = (op & 0x0f) * 4; + op_flags[i] |= OF_POLL_INSN; break; ///////////////////////////////////////////// @@ -3835,12 +6663,15 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x04: // MOV.B @Rm+,Rn 0110nnnnmmmm0100 case 0x05: // MOV.W @Rm+,Rn 0110nnnnmmmm0101 case 0x06: // MOV.L @Rm+,Rn 0110nnnnmmmm0110 - opd->dest = BITMASK1(GET_Rm()); + opd->dest = BITMASK2(GET_Rm(), GET_Rn()); + opd->source = BITMASK2(GET_Rm(), SHR_MEM); + break; case 0x00: // MOV.B @Rm,Rn 0110nnnnmmmm0000 case 0x01: // MOV.W @Rm,Rn 0110nnnnmmmm0001 case 0x02: // MOV.L @Rm,Rn 0110nnnnmmmm0010 - opd->source = BITMASK1(GET_Rm()); - opd->dest |= BITMASK1(GET_Rn()); + opd->dest = BITMASK1(GET_Rn()); + opd->source = BITMASK2(GET_Rm(), SHR_MEM); + op_flags[i] |= OF_POLL_INSN; break; case 0x0a: // NEGC Rm,Rn 0110nnnnmmmm1010 opd->source = BITMASK2(GET_Rm(), SHR_T); @@ -3868,7 +6699,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x07: // ADD #imm,Rn 0111nnnniiiiiiii opd->source = opd->dest = BITMASK1(GET_Rn()); - opd->imm = (int)(signed char)op; + opd->imm = (s8)op; break; ///////////////////////////////////////////// @@ -3877,26 +6708,30 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, { case 0x0000: // MOV.B R0,@(disp,Rn) 10000000nnnndddd opd->source = BITMASK2(GET_Rm(), SHR_R0); + opd->dest = BITMASK1(SHR_MEM); opd->imm = (op & 0x0f); break; case 0x0100: // MOV.W R0,@(disp,Rn) 10000001nnnndddd opd->source = BITMASK2(GET_Rm(), SHR_R0); + opd->dest = BITMASK1(SHR_MEM); opd->imm = (op & 0x0f) * 2; break; case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd - opd->source = BITMASK1(GET_Rm()); + opd->source = BITMASK2(GET_Rm(), SHR_MEM); opd->dest = BITMASK1(SHR_R0); opd->imm = (op & 0x0f); + op_flags[i] |= OF_POLL_INSN; break; case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd - opd->source = BITMASK1(GET_Rm()); + opd->source = BITMASK2(GET_Rm(), SHR_MEM); opd->dest = BITMASK1(SHR_R0); opd->imm = (op & 0x0f) * 2; + op_flags[i] |= OF_POLL_INSN; break; case 0x0800: // CMP/EQ #imm,R0 10001000iiiiiiii opd->source = BITMASK1(SHR_R0); opd->dest = BITMASK1(SHR_T); - opd->imm = (int)(signed char)op; + opd->imm = (s8)op; break; case 0x0d00: // BT/S label 10001101dddddddd case 0x0f00: // BF/S label 10001111dddddddd @@ -3905,7 +6740,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0900: // BT label 10001001dddddddd case 0x0b00: // BF label 10001011dddddddd opd->op = (op & 0x0200) ? OP_BRANCH_CF : OP_BRANCH_CT; - opd->source = BITMASK1(SHR_T); + opd->source = BITMASK2(SHR_PC, SHR_T); opd->dest = BITMASK1(SHR_PC); opd->imm = ((signed int)(op << 24) >> 23); opd->imm += pc + 4; @@ -3925,13 +6760,16 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, if (op_flags[i] & OF_DELAY_OP) { if (ops[i-1].op == OP_BRANCH) tmp = ops[i-1].imm; - else + else if (ops[i-1].op != OP_BRANCH_N) tmp = 0; } - opd->source = BITMASK1(SHR_PC); + opd->source = BITMASK2(SHR_PC, SHR_MEM); opd->dest = BITMASK1(GET_Rn()); - if (tmp) + if (tmp) { opd->imm = tmp + 2 + (op & 0xff) * 2; + if (lowest_literal == 0 || opd->imm < lowest_literal) + lowest_literal = opd->imm; + } opd->size = 1; break; @@ -3942,14 +6780,21 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0a: // BRA label 1010dddddddddddd opd->op = OP_BRANCH; + opd->source = BITMASK1(SHR_PC); opd->dest |= BITMASK1(SHR_PC); opd->imm = ((signed int)(op << 20) >> 19); opd->imm += pc + 4; opd->cycles = 2; next_is_delay = 1; - end_block = 1; - if (base_pc <= opd->imm && opd->imm < base_pc + BLOCK_INSN_LIMIT * 2) - op_flags[(opd->imm - base_pc) / 2] |= OF_BTARGET; + if (!(opd->dest & BITMASK1(SHR_PR))) { + if (base_pc <= opd->imm && opd->imm < base_pc + BLOCK_INSN_LIMIT * 2) { + op_flags[(opd->imm - base_pc) / 2] |= OF_BTARGET; + if (opd->imm <= pc) + end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); + } else + end_block = !(op_flags[i+1+next_is_delay] & OF_BTARGET); + } else + op_flags[i+1+next_is_delay] |= OF_BTARGET; break; ///////////////////////////////////////////// @@ -3960,23 +6805,26 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, case 0x0100: // MOV.W R0,@(disp,GBR) 11000001dddddddd case 0x0200: // MOV.L R0,@(disp,GBR) 11000010dddddddd opd->source = BITMASK2(SHR_GBR, SHR_R0); + opd->dest = BITMASK1(SHR_MEM); opd->size = (op & 0x300) >> 8; opd->imm = (op & 0xff) << opd->size; break; case 0x0400: // MOV.B @(disp,GBR),R0 11000100dddddddd case 0x0500: // MOV.W @(disp,GBR),R0 11000101dddddddd case 0x0600: // MOV.L @(disp,GBR),R0 11000110dddddddd - opd->source = BITMASK1(SHR_GBR); + opd->source = BITMASK2(SHR_GBR, SHR_MEM); opd->dest = BITMASK1(SHR_R0); opd->size = (op & 0x300) >> 8; opd->imm = (op & 0xff) << opd->size; + op_flags[i] |= OF_POLL_INSN; break; case 0x0300: // TRAPA #imm 11000011iiiiiiii - opd->source = BITMASK2(SHR_PC, SHR_SR); - opd->dest = BITMASK1(SHR_PC); - opd->imm = (op & 0xff) * 4; + opd->op = OP_TRAPA; + opd->source = BITMASK4(SHR_SP, SHR_PC, SHR_SR, SHR_T); + opd->dest = BITMASK2(SHR_SP, SHR_PC); + opd->imm = (op & 0xff); opd->cycles = 8; - end_block = 1; // FIXME + op_flags[i+1] |= OF_BTARGET; break; case 0x0700: // MOVA @(disp,PC),R0 11000111dddddddd opd->op = OP_MOVA; @@ -3984,7 +6832,7 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, if (op_flags[i] & OF_DELAY_OP) { if (ops[i-1].op == OP_BRANCH) tmp = ops[i-1].imm; - else + else if (ops[i-1].op != OP_BRANCH_N) tmp = 0; } opd->dest = BITMASK1(SHR_R0); @@ -4014,15 +6862,17 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, opd->imm = op & 0xff; break; case 0x0c00: // TST.B #imm,@(R0,GBR) 11001100iiiiiiii - opd->source = BITMASK2(SHR_GBR, SHR_R0); + opd->source = BITMASK3(SHR_GBR, SHR_R0, SHR_MEM); opd->dest = BITMASK1(SHR_T); opd->imm = op & 0xff; + op_flags[i] |= OF_POLL_INSN; opd->cycles = 3; break; case 0x0d00: // AND.B #imm,@(R0,GBR) 11001101iiiiiiii case 0x0e00: // XOR.B #imm,@(R0,GBR) 11001110iiiiiiii case 0x0f00: // OR.B #imm,@(R0,GBR) 11001111iiiiiiii - opd->source = BITMASK2(SHR_GBR, SHR_R0); + opd->source = BITMASK3(SHR_GBR, SHR_R0, SHR_MEM); + opd->dest = BITMASK1(SHR_MEM); opd->imm = op & 0xff; opd->cycles = 3; break; @@ -4039,88 +6889,210 @@ void scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc_out, if (op_flags[i] & OF_DELAY_OP) { if (ops[i-1].op == OP_BRANCH) tmp = ops[i-1].imm; - else + else if (ops[i-1].op != OP_BRANCH_N) tmp = 0; } - opd->source = BITMASK1(SHR_PC); + opd->source = BITMASK2(SHR_PC, SHR_MEM); opd->dest = BITMASK1(GET_Rn()); - if (tmp) + if (tmp) { opd->imm = (tmp + 2 + (op & 0xff) * 4) & ~3; + if (lowest_literal == 0 || opd->imm < lowest_literal) + lowest_literal = opd->imm; + } opd->size = 2; break; ///////////////////////////////////////////// case 0x0e: // MOV #imm,Rn 1110nnnniiiiiiii + opd->op = OP_LOAD_CONST; opd->dest = BITMASK1(GET_Rn()); - opd->imm = (u32)(signed int)(signed char)op; + opd->imm = (s8)op; break; default: undefined: - elprintf(EL_ANOMALY, "%csh2 drc: unhandled op %04x @ %08x", - is_slave ? 's' : 'm', op, pc); + opd->op = OP_UNDEFINED; + // an unhandled instruction is probably not code if it's not the 1st insn + if (!(op_flags[i] & OF_DELAY_OP) && pc != base_pc) + goto end; break; } + + if (op_flags[i] & OF_DELAY_OP) { + switch (opd->op) { + case OP_BRANCH: + case OP_BRANCH_N: + case OP_BRANCH_CT: + case OP_BRANCH_CF: + case OP_BRANCH_R: + case OP_BRANCH_RF: + elprintf(EL_ANOMALY, "%csh2 drc: branch in DS @ %08x", + is_slave ? 's' : 'm', pc); + opd->op = OP_UNDEFINED; + op_flags[i] |= OF_B_IN_DS; + next_is_delay = 0; + break; + } + } else if (!is_divop && i_div >= 0) + i_div = -1; // divide parser stop } +end: i_end = i; end_pc = pc; // 2nd pass: some analysis - for (i = 0; i < i_end; i++) { + lowest_literal = end_literals = lowest_mova = 0; + t = T_UNKNOWN; // T flag state + last_btarget = 0; + op = 0; // delay/poll insns counter + is_divop = 0; // divide op insns counter + i_div = -1; // index of current divide op + for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) { opd = &ops[i]; + crc += FETCH_OP(pc); // propagate T (TODO: DIV0U) - if ((opd->op == OP_SETCLRT && !opd->imm) || opd->op == OP_BRANCH_CT) - op_flags[i + 1] |= OF_T_CLEAR; - else if ((opd->op == OP_SETCLRT && opd->imm) || opd->op == OP_BRANCH_CF) - op_flags[i + 1] |= OF_T_SET; + if (op_flags[i] & OF_BTARGET) + t = T_UNKNOWN; - if ((op_flags[i] & OF_BTARGET) || (opd->dest & BITMASK1(SHR_T))) - op_flags[i] &= ~(OF_T_SET | OF_T_CLEAR); - else - op_flags[i + 1] |= op_flags[i] & (OF_T_SET | OF_T_CLEAR); - - if ((opd->op == OP_BRANCH_CT && (op_flags[i] & OF_T_SET)) - || (opd->op == OP_BRANCH_CF && (op_flags[i] & OF_T_CLEAR))) - { + if ((opd->op == OP_BRANCH_CT && t == T_SET) || + (opd->op == OP_BRANCH_CF && t == T_CLEAR)) { opd->op = OP_BRANCH; - opd->cycles = 3; - i_end = i + 1; + opd->cycles = (op_flags[i + 1] & OF_DELAY_OP) ? 2 : 3; + } else if ((opd->op == OP_BRANCH_CT && t == T_CLEAR) || + (opd->op == OP_BRANCH_CF && t == T_SET)) + opd->op = OP_BRANCH_N; + else if (OP_ISBRACND(opd->op)) + t = (opd->op == OP_BRANCH_CF ? T_SET : T_CLEAR); + else if (opd->op == OP_SETCLRT) + t = (opd->imm ? T_SET : T_CLEAR); + else if (opd->dest & BITMASK1(SHR_T)) + t = T_UNKNOWN; + + // "overscan" detection: unreachable code after unconditional branch + // this can happen if the insn after a forward branch isn't a local target + if (OP_ISBRAUC(opd->op)) { if (op_flags[i + 1] & OF_DELAY_OP) { - opd->cycles = 2; - i_end++; + if (i_end > i + 2 && !(op_flags[i + 2] & OF_BTARGET)) + i_end = i + 2; + } else { + if (i_end > i + 1 && !(op_flags[i + 1] & OF_BTARGET)) + i_end = i + 1; } } - else if (opd->op == OP_LOAD_POOL) - { - if (opd->imm < end_pc + MAX_LITERAL_OFFSET) { + + // divide operation verification: + // 1. there must not be a branch target inside + // 2. nothing is in a delay slot (could only be DIV0) + // 2. DIV0/n*(ROTCL+DIV1)/ROTCL: + // div.div1 > 0 && div.rotcl == div.div1+1 && div.rn =! div.ro + // 3. DIV0/n*DIV1/ROTCL: + // div.div1 > 0 && div.rotcl == 1 && div.ro == div.rn + if (i_div >= 0) { + if (op_flags[i] & OF_BTARGET) { // condition 1 + ops[i_div].imm = 0; + i_div = -1; + } else if (--is_divop == 0) + i_div = -1; + } else if (opd->op == OP_DIV0) { + struct div *div = &div(opd); + is_divop = div->div1 + div->rotcl; + if (op_flags[i] & OF_DELAY_OP) // condition 2 + opd->imm = 0; + else if (! div->div1 || ! ((div->ro == div->rn && div->rotcl == 1) || + (div->ro != div->rn && div->rotcl == div->div1+1))) + opd->imm = 0; // condition 3+4 + else if (is_divop) + i_div = i; + } + + // literal pool size detection + if (opd->op == OP_MOVA && opd->imm >= base_pc) + if (lowest_mova == 0 || opd->imm < lowest_mova) + lowest_mova = opd->imm; + if (opd->op == OP_LOAD_POOL) { + if (opd->imm >= base_pc && opd->imm < end_pc + MAX_LITERAL_OFFSET) { if (end_literals < opd->imm + opd->size * 2) end_literals = opd->imm + opd->size * 2; + if (lowest_literal == 0 || lowest_literal > opd->imm) + lowest_literal = opd->imm; + if (opd->size == 2) { + // tweak for NFL: treat a 32bit literal as an address and check if it + // points to the literal space. In that case handle it like MOVA. + tmp = FETCH32(opd->imm) & ~0x20000000; // MUST ignore wt bit here + if (tmp >= end_pc && tmp < end_pc + MAX_LITERAL_OFFSET) + if (lowest_mova == 0 || tmp < lowest_mova) + lowest_mova = tmp; + } } } +#if LOOP_DETECTION + // inner loop detection + // 1. a loop always starts with a branch target (for the backwards jump) + // 2. it doesn't contain more than one polling and/or delaying insn + // 3. it doesn't contain unconditional jumps + // 4. no overlapping of loops + if (op_flags[i] & OF_BTARGET) { + last_btarget = i; // possible loop starting point + op = 0; + } + // XXX let's hope nobody is putting a delay or poll insn in a delay slot :-/ + if (OP_ISBRAIMM(opd->op)) { + // BSR, BRA, BT, BF with immediate target + int i_tmp = (opd->imm - base_pc) / 2; // branch target, index in ops + if (i_tmp == last_btarget) // candidate for basic loop optimizer + op_flags[i_tmp] |= OF_BASIC_LOOP; + if (i_tmp == last_btarget && op <= 1) { + op_flags[i_tmp] |= OF_LOOP; // conditions met -> mark loop + last_btarget = i+1; // condition 4 + } else if (opd->op == OP_BRANCH) + last_btarget = i+1; // condition 3 + } + else if (OP_ISBRAIND(opd->op)) + // BRAF, BSRF, JMP, JSR, register indirect. treat it as off-limits jump + last_btarget = i+1; // condition 3 + else if (op_flags[i] & (OF_POLL_INSN|OF_DELAY_INSN)) + op ++; // condition 2 +#endif } - end_pc = base_pc + i_end * 2; - if (end_literals < end_pc) - end_literals = end_pc; + end_pc = pc; // end_literals is used to decide to inline a literal or not // XXX: need better detection if this actually is used in write + if (lowest_literal >= base_pc) { + if (lowest_literal < end_pc) { + dbg(1, "warning: lowest_literal=%08x < end_pc=%08x", lowest_literal, end_pc); + // TODO: does this always mean end_pc covers data? + } + } if (lowest_mova >= base_pc) { if (lowest_mova < end_literals) { - dbg(1, "mova for %08x, block %08x", lowest_mova, base_pc); - end_literals = end_pc; + dbg(1, "warning: mova=%08x < end_literals=%08x", lowest_mova, end_literals); + end_literals = lowest_mova; } if (lowest_mova < end_pc) { - dbg(1, "warning: mova inside of blk for %08x, block %08x", - lowest_mova, base_pc); + dbg(1, "warning: mova=%08x < end_pc=%08x", lowest_mova, end_pc); end_literals = end_pc; } } + if (lowest_literal >= end_literals) + lowest_literal = end_literals; + + if (lowest_literal && end_literals) + for (pc = lowest_literal; pc < end_literals; pc += 2) + crc += FETCH_OP(pc); *end_pc_out = end_pc; + if (base_literals_out != NULL) + *base_literals_out = (lowest_literal ? lowest_literal : end_pc); if (end_literals_out != NULL) - *end_literals_out = end_literals; + *end_literals_out = (end_literals ? end_literals : end_pc); + + // crc overflow handling, twice to collect all overflows + crc = (crc & 0xffff) + (crc >> 16); + crc = (crc & 0xffff) + (crc >> 16); + return crc; } // vim:shiftwidth=2:ts=2:expandtab diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index ef1944b4..deddf8f6 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -1,26 +1,87 @@ int sh2_drc_init(SH2 *sh2); void sh2_drc_finish(SH2 *sh2); -void sh2_drc_wcheck_ram(unsigned int a, int val, int cpuid); -void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid); +void sh2_drc_wcheck_ram(u32 a, unsigned len, SH2 *sh2); +void sh2_drc_wcheck_da(u32 a, unsigned len, SH2 *sh2); #ifdef DRC_SH2 void sh2_drc_mem_setup(SH2 *sh2); void sh2_drc_flush_all(void); -void sh2_drc_frame(void); #else #define sh2_drc_mem_setup(x) #define sh2_drc_flush_all() #define sh2_drc_frame() #endif -#define BLOCK_INSN_LIMIT 128 +#define BLOCK_INSN_LIMIT 1024 /* op_flags */ #define OF_DELAY_OP (1 << 0) #define OF_BTARGET (1 << 1) -#define OF_T_SET (1 << 2) // T is known to be set -#define OF_T_CLEAR (1 << 3) // ... clear +#define OF_LOOP (3 << 2) // NONE, IDLE, DELAY, POLL loop +#define OF_B_IN_DS (1 << 4) +#define OF_DELAY_INSN (1 << 5) // DT, (TODO ADD+CMP?) +#define OF_POLL_INSN (1 << 6) // MOV @(...),Rn (no post increment), TST @(...) +#define OF_BASIC_LOOP (1 << 7) // pinnable loop without any branches in it -void scan_block(unsigned int base_pc, int is_slave, - unsigned char *op_flags, unsigned int *end_pc, - unsigned int *end_literals); +#define OF_IDLE_LOOP (1 << 2) +#define OF_DELAY_LOOP (2 << 2) +#define OF_POLL_LOOP (3 << 2) + +u16 scan_block(u32 base_pc, int is_slave, u8 *op_flags, u32 *end_pc, + u32 *base_literals, u32 *end_literals); + +#if defined(DRC_SH2) && defined(__GNUC__) && !defined(__clang__) +// direct access to some host CPU registers used by the DRC if gcc is used. +// XXX MUST match SHR_SR definitions in cpu/drc/emit_*.c; should be moved there +// XXX yuck, there's no portable way to determine register size. Use long long +// if target is 64 bit and data model is ILP32 or LLP64(windows), else long +#if defined(__arm__) +#define DRC_SR_REG "r10" +#define DRC_REG_LL 0 // 32 bit +#elif defined(__aarch64__) +#define DRC_SR_REG "r28" +#define DRC_REG_LL (__ILP32__ || _WIN32) +#elif defined(__mips__) +#define DRC_SR_REG "s6" +#define DRC_REG_LL (_MIPS_SZPTR > _MIPS_SZLONG) // (_MIPS_SIM == _ABIN32) +#elif defined(__riscv__) || defined(__riscv) +#define DRC_SR_REG "s11" +#define DRC_REG_LL 0 // no ABI for (__ILP32__ && __riscv_xlen != 32) +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) +#define DRC_SR_REG "r28" +#define DRC_REG_LL 0 // no ABI for __ILP32__ +//i386 only has 8 registers and reserving one of them causes too much spilling +//#elif defined(__i386__) +//#define DRC_SR_REG "edi" +//#define DRC_REG_LL 0 // 32 bit +#elif defined(__x86_64__) +#define DRC_SR_REG "rbx" +#define DRC_REG_LL (__ILP32__ || _WIN32) +#endif +#endif + +#ifdef DRC_SR_REG +// XXX this is more clear but produces too much overhead for slow platforms +extern void REGPARM(1) (*sh2_drc_save_sr)(SH2 *sh2); +extern void REGPARM(1) (*sh2_drc_restore_sr)(SH2 *sh2); + +// NB: sh2_sr MUST have register size if optimizing with -O3 (-fif-conversion) +#if DRC_REG_LL +#define DRC_DECLARE_SR register long long _sh2_sr asm(DRC_SR_REG) +#else +#define DRC_DECLARE_SR register long _sh2_sr asm(DRC_SR_REG) +#endif +// NB: save/load SR register only when DRC is executing and not in DMA access +#define DRC_SAVE_SR(sh2) \ + if (likely((sh2->state & (SH2_IN_DRC|SH2_STATE_SLEEP)) == SH2_IN_DRC)) \ + sh2->sr = (s32)_sh2_sr +// host_call(sh2_drc_save_sr, (SH2 *))(sh2) +#define DRC_RESTORE_SR(sh2) \ + if (likely((sh2->state & (SH2_IN_DRC|SH2_STATE_SLEEP)) == SH2_IN_DRC)) \ + _sh2_sr = (s32)sh2->sr +// host_call(sh2_drc_restore_sr, (SH2 *))(sh2) +#else +#define DRC_DECLARE_SR +#define DRC_SAVE_SR(sh2) +#define DRC_RESTORE_SR(sh2) +#endif diff --git a/cpu/sh2/mame/sh2.c b/cpu/sh2/mame/sh2.c index 81203e7b..17b96a31 100644 --- a/cpu/sh2/mame/sh2.c +++ b/cpu/sh2/mame/sh2.c @@ -108,6 +108,7 @@ //#include "debugger.h" //#include "sh2.h" //#include "sh2comn.h" +#undef INLINE #define INLINE static //CPU_DISASSEMBLE( sh2 ); @@ -371,7 +372,7 @@ INLINE void BRA(sh2_state *sh2, UINT32 d) #if BUSY_LOOP_HACKS if (disp == -2) { - UINT32 next_opcode = RW( sh2, sh2->ppc & AM ); + UINT32 next_opcode = (UINT32)(UINT16)RW( sh2, sh2->ppc & AM ); /* BRA $ * NOP */ @@ -799,9 +800,10 @@ INLINE void DT(sh2_state *sh2, UINT32 n) sh2->sr |= T; else sh2->sr &= ~T; + sh2->no_polling = SH2_NO_POLLING; #if BUSY_LOOP_HACKS { - UINT32 next_opcode = RW( sh2, sh2->ppc & AM ); + UINT32 next_opcode = (UINT32)(UINT16)RW( sh2, sh2->ppc & AM ); /* DT Rn * BF $-2 */ @@ -1048,12 +1050,12 @@ INLINE void MAC_W(sh2_state *sh2, UINT32 m, UINT32 n) INT32 tempm, tempn, dest, src, ans; UINT32 templ; - tempn = (INT32) RW( sh2, sh2->r[n] ); + tempn = (INT32)(INT16) RW( sh2, sh2->r[n] ); sh2->r[n] += 2; - tempm = (INT32) RW( sh2, sh2->r[m] ); + tempm = (INT32)(INT16) RW( sh2, sh2->r[m] ); sh2->r[m] += 2; templ = sh2->macl; - tempm = ((INT32) (short) tempn * (INT32) (short) tempm); + tempm = (tempn * tempm); if ((INT32) sh2->macl >= 0) dest = 0; else diff --git a/cpu/sh2/mame/sh2dasm.c b/cpu/sh2/mame/sh2dasm.c index 21a32e68..0ecb7f45 100644 --- a/cpu/sh2/mame/sh2dasm.c +++ b/cpu/sh2/mame/sh2dasm.c @@ -465,7 +465,7 @@ static UINT32 op1000(char *buffer, UINT32 pc, UINT16 opcode) sprintf(buffer, "MOV.B @($%02X,%s),R0", (opcode & 15), regname[Rm]); break; case 5: - sprintf(buffer, "MOV.W @($%02X,%s),R0", (opcode & 15), regname[Rm]); + sprintf(buffer, "MOV.W @($%02X,%s),R0", (opcode & 15) * 2, regname[Rm]); break; case 8: sprintf(buffer, "CMP/EQ #$%02X,R0", (opcode & 0xff)); @@ -610,6 +610,7 @@ unsigned DasmSH2(char *buffer, unsigned pc, UINT16 opcode) case 14: flags = op1110(buffer,pc,opcode); break; default: flags = op1111(buffer,pc,opcode); break; } + (void)flags; return 0;//2 | flags | DASMFLAG_SUPPORTED; } diff --git a/cpu/sh2/mame/sh2pico.c b/cpu/sh2/mame/sh2pico.c index a3ad9f47..2c2ea406 100644 --- a/cpu/sh2/mame/sh2pico.c +++ b/cpu/sh2/mame/sh2pico.c @@ -1,7 +1,7 @@ #include "../sh2.h" #ifdef DRC_CMP -#include "../compiler.c" +#include "../compiler.h" #define BUSY_LOOP_HACKS 0 #else #define BUSY_LOOP_HACKS 1 @@ -9,34 +9,36 @@ // MAME types #ifndef INT8 -typedef signed char INT8; -typedef signed short INT16; -typedef signed int INT32; -typedef unsigned int UINT32; -typedef unsigned short UINT16; -typedef unsigned char UINT8; +typedef s8 INT8; +typedef s16 INT16; +typedef s32 INT32; +typedef u32 UINT32; +typedef u16 UINT16; +typedef u8 UINT8; #endif #ifdef DRC_SH2 // this nasty conversion is needed for drc-expecting memhandlers #define MAKE_READFUNC(name, cname) \ -static inline unsigned int name(SH2 *sh2, unsigned int a) \ +static __inline unsigned int name(SH2 *sh2, unsigned int a) \ { \ unsigned int ret; \ - sh2->sr |= sh2->icount << 12; \ + sh2->sr |= (sh2->icount << 12) | (sh2->no_polling); \ ret = cname(a, sh2); \ sh2->icount = (signed int)sh2->sr >> 12; \ + sh2->no_polling = (sh2->sr & SH2_NO_POLLING); \ sh2->sr &= 0x3f3; \ return ret; \ } #define MAKE_WRITEFUNC(name, cname) \ -static inline void name(SH2 *sh2, unsigned int a, unsigned int d) \ +static __inline void name(SH2 *sh2, unsigned int a, unsigned int d) \ { \ - sh2->sr |= sh2->icount << 12; \ + sh2->sr |= (sh2->icount << 12) | (sh2->no_polling); \ cname(a, d, sh2); \ sh2->icount = (signed int)sh2->sr >> 12; \ + sh2->no_polling = (sh2->sr & SH2_NO_POLLING); \ sh2->sr &= 0x3f3; \ } @@ -121,13 +123,25 @@ int sh2_execute_interpreter(SH2 *sh2, int cycles) if (sh2->delay) { sh2->ppc = sh2->delay; - opcode = RW(sh2, sh2->delay); + opcode = (UINT32)(UINT16)RW(sh2, sh2->delay); + + // TODO: more branch types + if ((opcode >> 13) == 5) { // BRA/BSR + sh2->r[15] -= 4; + WL(sh2, sh2->r[15], sh2->sr); + sh2->r[15] -= 4; + WL(sh2, sh2->r[15], sh2->pc); + sh2->pc = RL(sh2, sh2->vbr + 6 * 4); + sh2->icount -= 5; + opcode = 9; // NOP + } + sh2->pc -= 2; } else { sh2->ppc = sh2->pc; - opcode = RW(sh2, sh2->pc); + opcode = (UINT32)(UINT16)RW(sh2, sh2->pc); } sh2->delay = 0; @@ -155,14 +169,16 @@ int sh2_execute_interpreter(SH2 *sh2, int cycles) sh2->icount--; - if (sh2->test_irq && !sh2->delay && sh2->pending_level > ((sh2->sr >> 4) & 0x0f)) + if (sh2->test_irq && !sh2->delay) { int level = sh2->pending_level; - int vector = sh2->irq_callback(sh2, level); - sh2_do_irq(sh2, level, vector); + if (level > ((sh2->sr >> 4) & 0x0f)) + { + int vector = sh2->irq_callback(sh2, level); + sh2_do_irq(sh2, level, vector); + } sh2->test_irq = 0; } - } while (sh2->icount > 0 || sh2->delay); /* can't interrupt before delay */ @@ -202,14 +218,14 @@ int sh2_execute_interpreter(SH2 *sh2, int cycles) if (sh2->pc < *base_pc || sh2->pc >= *end_pc) { *base_pc = sh2->pc; scan_block(*base_pc, sh2->is_slave, - op_flags, end_pc, NULL); + op_flags, end_pc, NULL, NULL); } if ((op_flags[(sh2->pc - *base_pc) / 2] & OF_BTARGET) || sh2->pc == *base_pc || pc_expect != sh2->pc) // branched { pc_expect = sh2->pc; - if (sh2->icount < 0) + if (sh2->icount <= 0) break; } @@ -220,13 +236,13 @@ int sh2_execute_interpreter(SH2 *sh2, int cycles) if (sh2->delay) { sh2->ppc = sh2->delay; - opcode = RW(sh2, sh2->delay); + opcode = (UINT32)(UINT16)RW(sh2, sh2->delay); sh2->pc -= 2; } else { sh2->ppc = sh2->pc; - opcode = RW(sh2, sh2->pc); + opcode = (UINT32)(UINT16)RW(sh2, sh2->pc); } sh2->delay = 0; diff --git a/cpu/sh2/sh2.c b/cpu/sh2/sh2.c index 23b8fc0a..0229a1f1 100644 --- a/cpu/sh2/sh2.c +++ b/cpu/sh2/sh2.c @@ -84,7 +84,7 @@ int sh2_irl_irq(SH2 *sh2, int level, int nested_call) // do this to avoid missing irqs that other SH2 might clear int vector = sh2->irq_callback(sh2, level); sh2_do_irq(sh2, level, vector); - sh2->m68krcycles_done += C_SH2_TO_M68K(*sh2, 13); + sh2->m68krcycles_done += C_SH2_TO_M68K(sh2, 13); } else sh2->test_irq = 1; @@ -115,6 +115,7 @@ void sh2_pack(const SH2 *sh2, unsigned char *buff) p[0] = sh2->pending_int_irq; p[1] = sh2->pending_int_vector; + p[2] = sh2->m68krcycles_done; } void sh2_unpack(SH2 *sh2, const unsigned char *buff) @@ -127,6 +128,7 @@ void sh2_unpack(SH2 *sh2, const unsigned char *buff) sh2->pending_int_irq = p[0]; sh2->pending_int_vector = p[1]; sh2->test_irq = 1; + sh2->m68krcycles_done = p[2]; } #ifdef DRC_CMP @@ -237,7 +239,7 @@ static void dump_regs(SH2 *sh2) printf("%csh2 SR: %03x PR: %08x\n", csh2, sh2->sr, sh2->pr); } -void do_sh2_cmp(SH2 *current) +void REGPARM(1) do_sh2_cmp(SH2 *current) { static int current_slave; static u32 current_val; @@ -251,6 +253,13 @@ void do_sh2_cmp(SH2 *current) int cycles; int i, ret; +#if 0 + sr = current->sr; + current->sr &= 0x3f3; + do_sh2_trace(current, (signed int)sr >> 12); + current->sr = sr; + return; +#endif sh2ref[1].is_slave = 1; while (1) { diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 1394f94a..95c5658f 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -1,52 +1,69 @@ #ifndef __SH2_H__ #define __SH2_H__ -#if !defined(REGPARM) && defined(__i386__) -#define REGPARM(x) __attribute__((regparm(x))) -#else -#define REGPARM(x) -#endif +#include +#include // registers - matches structure order typedef enum { SHR_R0 = 0, SHR_SP = 15, SHR_PC, SHR_PPC, SHR_PR, SHR_SR, SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, + SH2_REGS, // register set size + SHR_T = 29, SHR_MEM = 30, SHR_TMP = 31, // drc specific pseudo regs } sh2_reg_e; +#define SHR_R(n) (SHR_R0+(n)) typedef struct SH2_ { - unsigned int r[16]; // 00 - unsigned int pc; // 40 - unsigned int ppc; - unsigned int pr; - unsigned int sr; - unsigned int gbr, vbr; // 50 - unsigned int mach, macl; // 58 + // registers. this MUST correlate with enum sh2_reg_e. + uint32_t r[16] ALIGNED(32); + uint32_t pc; // 40 + uint32_t ppc; + uint32_t pr; + uint32_t sr; + uint32_t gbr, vbr; // 50 + uint32_t mach, macl; // 58 // common - const void *read8_map; // 60 + const void *read8_map; const void *read16_map; + const void *read32_map; const void **write8_tab; const void **write16_tab; + const void **write32_tab; // drc stuff - int drc_tmp; // 70 + int drc_tmp; int irq_cycles; void *p_bios; // convenience pointers void *p_da; - void *p_sdram; // 80 + void *p_sdram; void *p_rom; + void *p_dram; + void *p_drcblk_da; + void *p_drcblk_ram; unsigned int pdb_io_csum[2]; #define SH2_STATE_RUN (1 << 0) // to prevent recursion -#define SH2_STATE_SLEEP (1 << 1) +#define SH2_STATE_SLEEP (1 << 1) // temporarily stopped (DMA, IO, ...) #define SH2_STATE_CPOLL (1 << 2) // polling comm regs #define SH2_STATE_VPOLL (1 << 3) // polling VDP +#define SH2_STATE_RPOLL (1 << 4) // polling address in SDRAM +#define SH2_TIMER_RUN (1 << 6) // SOC WDT timer is running +#define SH2_IN_DRC (1 << 7) // DRC in use unsigned int state; - unsigned int poll_addr; - int poll_cycles; + uint32_t poll_addr; + unsigned int poll_cycles; int poll_cnt; +// NB MUST be a bit unused in SH2 SR, see also cpu/sh2/compiler.c! +#define SH2_NO_POLLING (1 << 10) // poll detection control + int no_polling; + + // DRC branch cache. size must be 2^n and <=128 + int rts_cache_idx; + struct { uint32_t pc; void *code; } rts_cache[16]; + struct { uint32_t pc; void *code; } branch_cache[128]; // interpreter stuff int icount; // cycles left in current timeslice @@ -64,21 +81,22 @@ typedef struct SH2_ unsigned int cycles_timeslice; struct SH2_ *other_sh2; + int (*run)(struct SH2_ *, int); // we use 68k reference cycles for easier sync unsigned int m68krcycles_done; unsigned int mult_m68k_to_sh2; unsigned int mult_sh2_to_m68k; - unsigned char data_array[0x1000]; // cache (can be used as RAM) - unsigned int peri_regs[0x200/4]; // periphereal regs + uint8_t data_array[0x1000]; // cache (can be used as RAM) + uint32_t peri_regs[0x200/4]; // peripheral regs } SH2; #define CYCLE_MULT_SHIFT 10 #define C_M68K_TO_SH2(xsh2, c) \ - ((int)((c) * (xsh2).mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT) + (int)(((uint64_t)(c) * (xsh2)->mult_m68k_to_sh2) >> CYCLE_MULT_SHIFT) #define C_SH2_TO_M68K(xsh2, c) \ - ((int)((c + 3) * (xsh2).mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT) + (int)(((uint64_t)(c+3U) * (xsh2)->mult_sh2_to_m68k) >> CYCLE_MULT_SHIFT) int sh2_init(SH2 *sh2, int is_slave, SH2 *other_sh2); void sh2_finish(SH2 *sh2); @@ -92,17 +110,21 @@ void sh2_unpack(SH2 *sh2, const unsigned char *buff); int sh2_execute_drc(SH2 *sh2c, int cycles); int sh2_execute_interpreter(SH2 *sh2c, int cycles); -static inline int sh2_execute(SH2 *sh2, int cycles, int use_drc) +static __inline void sh2_execute_prepare(SH2 *sh2, int use_drc) +{ +#ifdef DRC_SH2 + sh2->run = use_drc ? sh2_execute_drc : sh2_execute_interpreter; +#else + sh2->run = sh2_execute_interpreter; +#endif +} + +static __inline int sh2_execute(SH2 *sh2, int cycles) { int ret; sh2->cycles_timeslice = cycles; -#ifdef DRC_SH2 - if (use_drc) - ret = sh2_execute_drc(sh2, cycles); - else -#endif - ret = sh2_execute_interpreter(sh2, cycles); + ret = sh2->run(sh2, cycles); return sh2->cycles_timeslice - ret; } @@ -112,17 +134,17 @@ static inline int sh2_execute(SH2 *sh2, int cycles, int use_drc) // pico memhandlers // XXX: move somewhere else -unsigned int REGPARM(2) p32x_sh2_read8(unsigned int a, SH2 *sh2); -unsigned int REGPARM(2) p32x_sh2_read16(unsigned int a, SH2 *sh2); -unsigned int REGPARM(2) p32x_sh2_read32(unsigned int a, SH2 *sh2); -void REGPARM(3) p32x_sh2_write8 (unsigned int a, unsigned int d, SH2 *sh2); -void REGPARM(3) p32x_sh2_write16(unsigned int a, unsigned int d, SH2 *sh2); -void REGPARM(3) p32x_sh2_write32(unsigned int a, unsigned int d, SH2 *sh2); +u32 REGPARM(2) p32x_sh2_read8(u32 a, SH2 *sh2); +u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2); +u32 REGPARM(2) p32x_sh2_read32(u32 a, SH2 *sh2); +void REGPARM(3) p32x_sh2_write8 (u32 a, u32 d, SH2 *sh2); +void REGPARM(3) p32x_sh2_write16(u32 a, u32 d, SH2 *sh2); +void REGPARM(3) p32x_sh2_write32(u32 a, u32 d, SH2 *sh2); // debug #ifdef DRC_CMP void do_sh2_trace(SH2 *current, int cycles); -void do_sh2_cmp(SH2 *current); +void REGPARM(1) do_sh2_cmp(SH2 *current); #endif #endif /* __SH2_H__ */ diff --git a/jni/Android.mk b/jni/Android.mk index 72a81634..43680aa7 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -1,80 +1,132 @@ LOCAL_PATH := $(call my-dir) -include $(CLEAR_VARS) +CORE_DIR := $(LOCAL_PATH)/.. +COMMON_DIR := $(CORE_DIR)/platform/common +LIBRETRO_DIR := $(CORE_DIR)/platform/libretro +LIBRETRO_COMM_DIR := $(LIBRETRO_DIR)/libretro-common +UNZIP_DIR := $(CORE_DIR)/unzip +R := $(CORE_DIR)/ +FR := $(R) -ifeq ($(NEON_BUILD)$(TARGET_ARCH_ABI),1armeabi-v7a) - LOCAL_MODULE := retro_picodrive-neon -else - LOCAL_MODULE := retro_picodrive -endif +SRCS_COMMON := +DEFINES := +ARCH := $(TARGET_ARCH) -R := ../ -FR := $(LOCAL_PATH)/$(R) +use_cyclone := 0 +use_fame := 1 +use_musashi := 0 +use_drz80 := 0 +use_cz80 := 1 +use_sh2drc := 0 +use_svpdrc := 0 -use_cyclone = 0 -use_fame = 0 -use_musashi = 0 -use_drz80 = 0 -use_cz80 = 0 -use_sh2drc = 0 -use_sh2mame = 0 -use_svpdrc = 0 - -asm_memory = 0 -asm_render = 0 -asm_ym2612 = 0 -asm_misc = 0 -asm_cdpico = 0 -asm_cdmemory = 0 -asm_mix = 0 +asm_memory := 0 +asm_render := 0 +asm_ym2612 := 0 +asm_misc := 0 +asm_cdmemory := 0 +asm_mix := 0 +asm_32xdraw := 0 +asm_32xmemory := 0 ifeq ($(TARGET_ARCH),arm) - LOCAL_ARM_MODE := arm - ifeq ($(NEON_BUILD),1) - LOCAL_ARM_NEON := true - endif +# use_cyclone := 1 +# use_fame := 0 +# use_drz80 := 1 +# use_cz80 := 0 + use_sh2drc := 1 +# use_svpdrc := 1 - use_cyclone = 1 - use_drz80 = 1 - use_sh2drc = 1 - use_svpdrc = 1 - - asm_memory = 1 - asm_render = 1 - asm_ym2612 = 1 - asm_misc = 1 - asm_cdpico = 1 - asm_cdmemory = 1 - asm_mix = 1 -else - use_fame = 1 - use_cz80 = 1 - use_sh2mame = 1 +# asm_memory := 1 +# asm_render := 1 +# asm_ym2612 := 1 +# asm_misc := 1 +# asm_cdmemory := 1 +# asm_mix := 1 +# asm_32xdraw := 1 +# asm_32xmemory := 1 endif -# sources -SRCS_COMMON := -DEFINES := -ARCH := $(TARGET_ARCH) -include $(R)platform/common/common.mak +ifeq ($(TARGET_ARCH_ABI),armeabi) + CYCLONE_CONFIG := cyclone_config_armv4.h +endif -LOCAL_SRC_FILES += $(SRCS_COMMON) -LOCAL_SRC_FILES += $(R)platform/libretro.c -LOCAL_SRC_FILES += $(R)platform/common/mp3.c -LOCAL_SRC_FILES += $(R)platform/common/mp3_dummy.c +include $(COMMON_DIR)/common.mak -# zlib/unzip -LOCAL_SRC_FILES += $(R)zlib/gzio.c $(R)zlib/inffast.c $(R)zlib/inflate.c \ - $(R)zlib/inftrees.c $(R)zlib/trees.c $(R)zlib/deflate.c \ - $(R)zlib/crc32.c $(R)zlib/adler32.c $(R)zlib/zutil.c \ - $(R)zlib/compress.c $(R)zlib/uncompr.c +LCHDR = $(CORE_DIR)/pico/cd/libchdr +LCHDR_LZMA = $(LCHDR)/deps/lzma-24.05 +LCHDR_ZSTD = $(LCHDR)/deps/zstd-1.5.6/lib -LOCAL_SRC_FILES += $(R)unzip/unzip.c $(R)unzip/unzip_stream.c +SOURCES_C := $(LIBRETRO_DIR)/libretro.c \ + $(LIBRETRO_COMM_DIR)/formats/png/rpng.c \ + $(LIBRETRO_COMM_DIR)/streams/trans_stream.c \ + $(LIBRETRO_COMM_DIR)/streams/trans_stream_pipe.c \ + $(LIBRETRO_COMM_DIR)/streams/trans_stream_zlib.c \ + $(LIBRETRO_COMM_DIR)/file/file_path_io.c \ + $(LIBRETRO_COMM_DIR)/file/file_path.c \ + $(LIBRETRO_COMM_DIR)/vfs/vfs_implementation.c \ + $(LIBRETRO_COMM_DIR)/time/rtime.c \ + $(LIBRETRO_COMM_DIR)/string/stdstring.c \ + $(LIBRETRO_COMM_DIR)/encodings/encoding_utf.c \ + $(LIBRETRO_COMM_DIR)/compat/compat_strcasestr.c \ + $(LIBRETRO_COMM_DIR)/compat/compat_strl.c \ + $(COMMON_DIR)/mp3.c \ + $(COMMON_DIR)/mp3_sync.c \ + $(COMMON_DIR)/mp3_dummy.c \ + $(UNZIP_DIR)/unzip.c \ + $(LCHDR)/src/libchdr_bitstream.c \ + $(LCHDR)/src/libchdr_cdrom.c \ + $(LCHDR)/src/libchdr_chd.c \ + $(LCHDR)/src/libchdr_flac.c \ + $(LCHDR)/src/libchdr_huffman.c \ + $(LCHDR_LZMA)/src/Alloc.c \ + $(LCHDR_LZMA)/src/CpuArch.c \ + $(LCHDR_LZMA)/src/Delta.c \ + $(LCHDR_LZMA)/src/LzFind.c \ + $(LCHDR_LZMA)/src/LzmaDec.c \ + $(LCHDR_LZMA)/src/LzmaEnc.c \ + $(LCHDR_LZMA)/src/Sort.c \ + $(LCHDR_ZSTD)/common/entropy_common.c \ + $(LCHDR_ZSTD)/common/error_private.c \ + $(LCHDR_ZSTD)/common/fse_decompress.c \ + $(LCHDR_ZSTD)/common/xxhash.c \ + $(LCHDR_ZSTD)/common/zstd_common.c \ + $(LCHDR_ZSTD)/decompress/huf_decompress.c \ + $(LCHDR_ZSTD)/decompress/zstd_ddict.c \ + $(LCHDR_ZSTD)/decompress/zstd_decompress_block.c \ + $(LCHDR_ZSTD)/decompress/zstd_decompress.c -LOCAL_C_INCLUDES += $(R) +COREFLAGS := $(addprefix -D,$(DEFINES)) -fno-strict-aliasing -DUSE_LIBCHDR=1 -DZ7_ST -DZSTD_DISABLE_ASM +COREFLAGS += -I$(LCHDR)/include -I$(LCHDR_LZMA)/include -I$(LCHDR_ZSTD) +ifeq (,$(call gte,$(APP_PLATFORM_LEVEL),18)) +ifneq ($(TARGET_ARCH_ABI),arm64-v8a) +# HACK +COREFLAGS += -Dgetauxval=0* +endif +endif -LOCAL_CFLAGS += $(addprefix -D,$(DEFINES)) -LOCAL_CFLAGS += -Wall -O3 -ffast-math -DNDEBUG -LOCAL_LDLIBS := -llog +GIT_REVISION := $(shell git rev-parse --short HEAD || echo unknown) +COREFLAGS += -DREVISION=\"$(GIT_REVISION)\" + +ifneq ($(filter armeabi%, $(TARGET_ARCH_ABI)),) +$(CORE_DIR)/pico/pico_int_offs.h: + cp $(CORE_DIR)/tools/offsets/generic-ilp32-offsets.h $@ +.PHONY: $(CORE_DIR)/pico/pico_int_offs.h + +$(filter %.S,$(SRCS_COMMON)): $(CORE_DIR)/pico/pico_int_offs.h +endif + +include $(CLEAR_VARS) +LOCAL_MODULE := retro +LOCAL_SRC_FILES := $(SRCS_COMMON) $(SOURCES_C) +LOCAL_CFLAGS := $(COREFLAGS) +LOCAL_C_INCLUDES := $(CORE_DIR) $(LIBRETRO_COMM_DIR)/include +LOCAL_LDFLAGS := -Wl,-version-script=$(LIBRETRO_DIR)/link.T +LOCAL_LDLIBS := -llog -lz +LOCAL_ARM_MODE := arm + +ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) + LOCAL_ARM_NEON := true +endif include $(BUILD_SHARED_LIBRARY) diff --git a/pico/32x/32x.c b/pico/32x/32x.c index b8a84d93..de0193c3 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -1,18 +1,19 @@ /* * PicoDrive * (C) notaz, 2009,2010,2013 + * (C) irixxxx, 2019-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ #include "../pico_int.h" #include "../sound/ym2612.h" -#include "../../cpu/sh2/compiler.h" +#include struct Pico32x Pico32x; SH2 sh2s[2]; -#define SH2_IDLE_STATES (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_SLEEP) +#define SH2_IDLE_STATES (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL|SH2_STATE_SLEEP) static int REGPARM(2) sh2_irq_cb(SH2 *sh2, int level) { @@ -30,38 +31,43 @@ static int REGPARM(2) sh2_irq_cb(SH2 *sh2, int level) } // MUST specify active_sh2 when called from sh2 memhandlers -void p32x_update_irls(SH2 *active_sh2, int m68k_cycles) +void p32x_update_irls(SH2 *active_sh2, unsigned int m68k_cycles) { int irqs, mlvl = 0, slvl = 0; int mrun, srun; + if ((Pico32x.regs[0] & (P32XS_nRES|P32XS_ADEN)) != (P32XS_nRES|P32XS_ADEN)) + return; + if (active_sh2 != NULL) m68k_cycles = sh2_cycles_done_m68k(active_sh2); + // find top bit = highest irq number (0 <= irl <= 14/2) by binary search + // msh2 - irqs = Pico32x.sh2irqs | Pico32x.sh2irqi[0]; - while ((irqs >>= 1)) - mlvl++; - mlvl *= 2; + irqs = Pico32x.sh2irqi[0]; + if (irqs >= 0x10) mlvl += 8, irqs >>= 4; + if (irqs >= 0x04) mlvl += 4, irqs >>= 2; + if (irqs >= 0x02) mlvl += 2, irqs >>= 1; // ssh2 - irqs = Pico32x.sh2irqs | Pico32x.sh2irqi[1]; - while ((irqs >>= 1)) - slvl++; - slvl *= 2; + irqs = Pico32x.sh2irqi[1]; + if (irqs >= 0x10) slvl += 8, irqs >>= 4; + if (irqs >= 0x04) slvl += 4, irqs >>= 2; + if (irqs >= 0x02) slvl += 2, irqs >>= 1; - mrun = sh2_irl_irq(&msh2, mlvl, active_sh2 == &msh2); + mrun = sh2_irl_irq(&msh2, mlvl, msh2.state & SH2_STATE_RUN); if (mrun) { - p32x_sh2_poll_event(&msh2, SH2_IDLE_STATES, m68k_cycles); - if (active_sh2 == &msh2) - sh2_end_run(active_sh2, 1); + p32x_sh2_poll_event(msh2.poll_addr, &msh2, SH2_IDLE_STATES & ~SH2_STATE_SLEEP, m68k_cycles); + if (msh2.state & SH2_STATE_RUN) + sh2_end_run(&msh2, 0); } - srun = sh2_irl_irq(&ssh2, slvl, active_sh2 == &ssh2); + srun = sh2_irl_irq(&ssh2, slvl, ssh2.state & SH2_STATE_RUN); if (srun) { - p32x_sh2_poll_event(&ssh2, SH2_IDLE_STATES, m68k_cycles); - if (active_sh2 == &ssh2) - sh2_end_run(active_sh2, 1); + p32x_sh2_poll_event(ssh2.poll_addr, &ssh2, SH2_IDLE_STATES & ~SH2_STATE_SLEEP, m68k_cycles); + if (ssh2.state & SH2_STATE_RUN) + sh2_end_run(&ssh2, 0); } elprintf(EL_32X, "update_irls: m %d/%d, s %d/%d", mlvl, mrun, slvl, srun); @@ -70,16 +76,17 @@ void p32x_update_irls(SH2 *active_sh2, int m68k_cycles) // the mask register is inconsistent, CMD is supposed to be a mask, // while others are actually irq trigger enables? // TODO: test on hw.. -void p32x_trigger_irq(SH2 *sh2, int m68k_cycles, unsigned int mask) +void p32x_trigger_irq(SH2 *sh2, unsigned int m68k_cycles, unsigned int mask) { - Pico32x.sh2irqs |= mask & P32XI_VRES; + Pico32x.sh2irqi[0] |= mask & P32XI_VRES; + Pico32x.sh2irqi[1] |= mask & P32XI_VRES; Pico32x.sh2irqi[0] |= mask & (Pico32x.sh2irq_mask[0] << 3); Pico32x.sh2irqi[1] |= mask & (Pico32x.sh2irq_mask[1] << 3); p32x_update_irls(sh2, m68k_cycles); } -void p32x_update_cmd_irq(SH2 *sh2, int m68k_cycles) +void p32x_update_cmd_irq(SH2 *sh2, unsigned int m68k_cycles) { if ((Pico32x.sh2irq_mask[0] & 2) && (Pico32x.regs[2 / 2] & 1)) Pico32x.sh2irqi[0] |= P32XI_CMD; @@ -98,13 +105,21 @@ void Pico32xStartup(void) { elprintf(EL_STATUS|EL_32X, "32X startup"); + PicoIn.AHW |= PAHW_32X; // TODO: OOM handling - PicoAHW |= PAHW_32X; - sh2_init(&msh2, 0, &ssh2); - msh2.irq_callback = sh2_irq_cb; - sh2_init(&ssh2, 1, &msh2); - ssh2.irq_callback = sh2_irq_cb; + if (Pico32xMem == NULL) { + Pico32xMem = plat_mmap(0x06000000, sizeof(*Pico32xMem), 0, 0); + if (Pico32xMem == NULL) { + elprintf(EL_STATUS, "OOM"); + return; + } + memset(Pico32xMem, 0, sizeof(struct Pico32xMem)); + sh2_init(&msh2, 0, &ssh2); + msh2.irq_callback = sh2_irq_cb; + sh2_init(&ssh2, 1, &msh2); + ssh2.irq_callback = sh2_irq_cb; + } PicoMemSetup32x(); p32x_pwm_ctl_changed(); p32x_timers_recalc(); @@ -115,13 +130,29 @@ void Pico32xStartup(void) if (!Pico.m.pal) Pico32x.vdp_regs[0] |= P32XV_nPAL; + else + Pico32x.vdp_regs[0] &= ~P32XV_nPAL; rendstatus_old = -1; + Pico32xPrepare(); + emu_32x_startup(); +} + +void Pico32xShutdown(void) +{ + Pico32x.sh2_regs[0] &= ~P32XS2_ADEN; + + rendstatus_old = -1; + + PicoIn.AHW &= ~PAHW_32X; + if (PicoIn.AHW & PAHW_MCD) + PicoMemSetupCD(); + else + PicoMemSetup(); emu_32x_startup(); } -#define HWSWAP(x) (((x) << 16) | ((x) >> 16)) void p32x_reset_sh2s(void) { elprintf(EL_32X, "sh2 reset"); @@ -134,28 +165,30 @@ void p32x_reset_sh2s(void) // if we don't have BIOS set, perform it's work here. // MSH2 if (p32x_bios_m == NULL) { - unsigned int idl_src, idl_dst, idl_size; // initial data load - unsigned int vbr; - - // initial data - idl_src = HWSWAP(*(unsigned int *)(Pico.rom + 0x3d4)) & ~0xf0000000; - idl_dst = HWSWAP(*(unsigned int *)(Pico.rom + 0x3d8)) & ~0xf0000000; - idl_size= HWSWAP(*(unsigned int *)(Pico.rom + 0x3dc)); - if (idl_size > Pico.romsize || idl_src + idl_size > Pico.romsize || - idl_size > 0x40000 || idl_dst + idl_size > 0x40000 || (idl_src & 3) || (idl_dst & 3)) { - elprintf(EL_STATUS|EL_ANOMALY, "32x: invalid initial data ptrs: %06x -> %06x, %06x", - idl_src, idl_dst, idl_size); - } - else - memcpy(Pico32xMem->sdram + idl_dst, Pico.rom + idl_src, idl_size); - - // GBR/VBR - vbr = HWSWAP(*(unsigned int *)(Pico.rom + 0x3e8)); sh2_set_gbr(0, 0x20004000); - sh2_set_vbr(0, vbr); - // checksum and M_OK - Pico32x.regs[0x28 / 2] = *(unsigned short *)(Pico.rom + 0x18e); + if (!Pico.m.ncart_in) { // copy IDL from cartridge + unsigned int idl_src, idl_dst, idl_size; // initial data load + unsigned int vbr; + // initial data + idl_src = CPU_BE2(*(u32 *)(Pico.rom + 0x3d4)) & ~0xf0000000; + idl_dst = CPU_BE2(*(u32 *)(Pico.rom + 0x3d8)) & ~0xf0000000; + idl_size= CPU_BE2(*(u32 *)(Pico.rom + 0x3dc)); + // copy in guest memory space + idl_src += 0x2000000; + idl_dst += 0x6000000; + while (idl_size >= 4) { + p32x_sh2_write32(idl_dst, p32x_sh2_read32(idl_src, &msh2), &msh2); + idl_src += 4, idl_dst += 4, idl_size -= 4; + } + + // VBR + vbr = CPU_BE2(*(u32 *)(Pico.rom + 0x3e8)); + sh2_set_vbr(0, vbr); + + // checksum and M_OK + Pico32x.regs[0x28 / 2] = *(u16 *)(Pico.rom + 0x18e); + } // program will set M_OK } @@ -164,7 +197,7 @@ void p32x_reset_sh2s(void) unsigned int vbr; // GBR/VBR - vbr = HWSWAP(*(unsigned int *)(Pico.rom + 0x3ec)); + vbr = CPU_BE2(*(u32 *)(Pico.rom + 0x3ec)); sh2_set_gbr(1, 0x20004000); sh2_set_vbr(1, vbr); // program will set S_OK @@ -175,10 +208,6 @@ void p32x_reset_sh2s(void) void Pico32xInit(void) { - if (msh2.mult_m68k_to_sh2 == 0 || msh2.mult_sh2_to_m68k == 0) - Pico32xSetClocks(PICO_MSH2_HZ, 0); - if (ssh2.mult_m68k_to_sh2 == 0 || ssh2.mult_sh2_to_m68k == 0) - Pico32xSetClocks(0, PICO_MSH2_HZ); } void PicoPower32x(void) @@ -186,88 +215,106 @@ void PicoPower32x(void) memset(&Pico32x, 0, sizeof(Pico32x)); Pico32x.regs[0] = P32XS_REN|P32XS_nRES; // verified + Pico32x.regs[0x10/2] = 0xffff; Pico32x.vdp_regs[0x0a/2] = P32XV_VBLK|P32XV_PEN; } void PicoUnload32x(void) { - if (Pico32xMem != NULL) - plat_munmap(Pico32xMem, sizeof(*Pico32xMem)); - Pico32xMem = NULL; + if (PicoIn.AHW & PAHW_32X) + Pico32xShutdown(); + sh2_finish(&msh2); sh2_finish(&ssh2); - PicoAHW &= ~PAHW_32X; + if (Pico32xMem != NULL) + plat_munmap(Pico32xMem, sizeof(*Pico32xMem)); + Pico32xMem = NULL; } void PicoReset32x(void) { - if (PicoAHW & PAHW_32X) { + if (PicoIn.AHW & PAHW_32X) { p32x_trigger_irq(NULL, SekCyclesDone(), P32XI_VRES); - p32x_sh2_poll_event(&msh2, SH2_IDLE_STATES, 0); - p32x_sh2_poll_event(&ssh2, SH2_IDLE_STATES, 0); + p32x_sh2_poll_event(msh2.poll_addr, &msh2, SH2_IDLE_STATES, SekCyclesDone()); + p32x_sh2_poll_event(ssh2.poll_addr, &ssh2, SH2_IDLE_STATES, SekCyclesDone()); p32x_pwm_ctl_changed(); p32x_timers_recalc(); } } -static void p32x_start_blank(void) +static void p32x_render_frame(void) { - if (Pico32xDrawMode != PDM32X_OFF && !PicoSkipFrame) { + if (Pico32xDrawMode != PDM32X_OFF && !PicoIn.skipFrame) { int offs, lines; pprof_start(draw); offs = 8; lines = 224; - if ((Pico.video.reg[1] & 8) && !(PicoOpt & POPT_ALT_RENDERER)) { + if (Pico.video.reg[1] & 8) { offs = 0; lines = 240; } - // XXX: no proper handling of 32col mode.. if ((Pico32x.vdp_regs[0] & P32XV_Mx) != 0 && // 32x not blanking - (Pico.video.reg[12] & 1) && // 40col mode - (PicoDrawMask & PDRAW_32X_ON)) + (!(Pico.video.debug_p & PVD_KILL_32X))) { int md_bg = Pico.video.reg[7] & 0x3f; // we draw full layer (not line-by-line) PicoDraw32xLayer(offs, lines, md_bg); } - else if (Pico32xDrawMode != PDM32X_32X_ONLY) + else if (Pico32xDrawMode == PDM32X_BOTH) PicoDraw32xLayerMdOnly(offs, lines); pprof_end(draw); } +} +static void p32x_start_blank(void) +{ // enter vblank Pico32x.vdp_regs[0x0a/2] |= P32XV_VBLK|P32XV_PEN; // FB swap waits until vblank if ((Pico32x.vdp_regs[0x0a/2] ^ Pico32x.pending_fb) & P32XV_FS) { - Pico32x.vdp_regs[0x0a/2] &= ~P32XV_FS; - Pico32x.vdp_regs[0x0a/2] |= Pico32x.pending_fb; - Pico32xSwapDRAM(Pico32x.pending_fb ^ 1); + Pico32x.vdp_regs[0x0a/2] ^= P32XV_FS; + Pico32xSwapDRAM(Pico32x.pending_fb ^ P32XV_FS); } - p32x_trigger_irq(NULL, SekCyclesDone(), P32XI_VINT); - p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, 0); - p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, 0); + p32x_trigger_irq(NULL, Pico.t.m68c_aim, P32XI_VINT); + p32x_sh2_poll_event(msh2.poll_addr, &msh2, SH2_STATE_VPOLL, Pico.t.m68c_aim); + p32x_sh2_poll_event(ssh2.poll_addr, &ssh2, SH2_STATE_VPOLL, Pico.t.m68c_aim); } -void p32x_schedule_hint(SH2 *sh2, int m68k_cycles) +static void p32x_end_blank(void) +{ + // end vblank + Pico32x.vdp_regs[0x0a/2] &= ~P32XV_VBLK; // get out of vblank + if ((Pico32x.vdp_regs[0] & P32XV_Mx) != 0) // no forced blanking + Pico32x.vdp_regs[0x0a/2] &= ~P32XV_PEN; // no palette access + if (!(Pico32x.sh2_regs[0] & 0x80)) { + // NB must precede VInt per hw manual, min 4 SH-2 cycles to pass Mars Check + Pico32x.hint_counter = (int)(-1.5*0x10); + p32x_schedule_hint(NULL, Pico.t.m68c_aim); + } + + p32x_sh2_poll_event(msh2.poll_addr, &msh2, SH2_STATE_VPOLL, Pico.t.m68c_aim); + p32x_sh2_poll_event(ssh2.poll_addr, &ssh2, SH2_STATE_VPOLL, Pico.t.m68c_aim); +} + +void p32x_schedule_hint(SH2 *sh2, unsigned int m68k_cycles) { // rather rough, 32x hint is useless in practice int after; - if (!((Pico32x.sh2irq_mask[0] | Pico32x.sh2irq_mask[1]) & 4)) return; // nobody cares - // note: when Pico.m.scanline is 224, SH2s might - // still be at scanline 93 (or so) - if (!(Pico32x.sh2_regs[0] & 0x80) && Pico.m.scanline > 224) + if (!(Pico32x.sh2_regs[0] & 0x80) && (Pico.video.status & PVS_VB2)) return; - after = (Pico32x.sh2_regs[4 / 2] + 1) * 488; + Pico32x.hint_counter += (Pico32x.sh2_regs[4 / 2] + 1) * (int)(488.5*0x10); + after = Pico32x.hint_counter >> 4; + Pico32x.hint_counter &= 0xf; if (sh2 != NULL) p32x_event_schedule_sh2(sh2, P32X_EVENT_HINT, after); else @@ -278,8 +325,8 @@ void p32x_schedule_hint(SH2 *sh2, int m68k_cycles) static void fillend_event(unsigned int now) { Pico32x.vdp_regs[0x0a/2] &= ~P32XV_nFEN; - p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, now); - p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, now); + p32x_sh2_poll_event(msh2.poll_addr, &msh2, SH2_STATE_VPOLL, now); + p32x_sh2_poll_event(ssh2.poll_addr, &ssh2, SH2_STATE_VPOLL, now); } static void hint_event(unsigned int now) @@ -294,9 +341,9 @@ typedef void (event_cb)(unsigned int now); unsigned int p32x_event_times[P32X_EVENT_COUNT]; static unsigned int event_time_next; static event_cb *p32x_event_cbs[P32X_EVENT_COUNT] = { - [P32X_EVENT_PWM] = p32x_pwm_irq_event, - [P32X_EVENT_FILLEND] = fillend_event, - [P32X_EVENT_HINT] = hint_event, + p32x_pwm_irq_event, // P32X_EVENT_PWM + fillend_event, // P32X_EVENT_FILLEND + hint_event, // P32X_EVENT_HINT }; // schedule event at some time 'after', in m68k clocks @@ -320,8 +367,12 @@ void p32x_event_schedule_sh2(SH2 *sh2, enum p32x_event event, int after) p32x_event_schedule(now, event, after); - left_to_next = (event_time_next - now) * 3; - sh2_end_run(sh2, left_to_next); + left_to_next = C_M68K_TO_SH2(sh2, (int)(event_time_next - now)); + if (sh2_cycles_left(sh2) > left_to_next) { + if (left_to_next < 1) + left_to_next = 0; + sh2_end_run(sh2, left_to_next); + } } static void p32x_run_events(unsigned int until) @@ -363,19 +414,19 @@ static void p32x_run_events(unsigned int until) oldest, event_time_next); } -static inline void run_sh2(SH2 *sh2, int m68k_cycles) +static void run_sh2(SH2 *sh2, unsigned int m68k_cycles) { - int cycles, done; + unsigned int cycles, done; pevt_log_sh2_o(sh2, EVT_RUN_START); sh2->state |= SH2_STATE_RUN; - cycles = C_M68K_TO_SH2(*sh2, m68k_cycles); + cycles = C_M68K_TO_SH2(sh2, m68k_cycles); elprintf_sh2(sh2, EL_32X, "+run %u %d @%08x", sh2->m68krcycles_done, cycles, sh2->pc); - done = sh2_execute(sh2, cycles, PicoOpt & POPT_EN_DRC); + done = sh2_execute(sh2, cycles); - sh2->m68krcycles_done += C_SH2_TO_M68K(*sh2, done); + sh2->m68krcycles_done += C_SH2_TO_M68K(sh2, done); sh2->state &= ~SH2_STATE_RUN; pevt_log_sh2_o(sh2, EVT_RUN_END); elprintf_sh2(sh2, EL_32X, "-run %u %d", @@ -409,28 +460,30 @@ void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target) // there might be new event to schedule current sh2 to if (event_time_next) { - left_to_event = event_time_next - m68k_target; - left_to_event *= 3; + left_to_event = C_M68K_TO_SH2(sh2, (int)(event_time_next - m68k_target)); if (sh2_cycles_left(sh2) > left_to_event) { if (left_to_event < 1) - left_to_event = 1; + left_to_event = 0; sh2_end_run(sh2, left_to_event); } } } +#define STEP_LS 24 +#define STEP_N 528 // at least one line (488) + #define sync_sh2s_normal p32x_sync_sh2s //#define sync_sh2s_lockstep p32x_sync_sh2s /* most timing is in 68k clock */ void sync_sh2s_normal(unsigned int m68k_target) { - unsigned int now, target, timer_cycles; + unsigned int now, target, next, timer_cycles; int cycles; elprintf(EL_32X, "sh2 sync to %u", m68k_target); - if (!(Pico32x.regs[0] & P32XS_nRES)) { + if ((Pico32x.regs[0] & (P32XS_nRES|P32XS_ADEN)) != (P32XS_nRES|P32XS_ADEN)) { msh2.m68krcycles_done = ssh2.m68krcycles_done = m68k_target; return; // rare } @@ -440,6 +493,7 @@ void sync_sh2s_normal(unsigned int m68k_target) now = ssh2.m68krcycles_done; timer_cycles = now; + pprof_start(m68k); while (CYCLES_GT(m68k_target, now)) { if (event_time_next && CYCLES_GE(now, event_time_next)) @@ -448,47 +502,68 @@ void sync_sh2s_normal(unsigned int m68k_target) target = m68k_target; if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; - while (CYCLES_GT(target, now)) { - elprintf(EL_32X, "sh2 exec to %u %d,%d/%d, flags %x", target, - target - msh2.m68krcycles_done, target - ssh2.m68krcycles_done, + next = target; + if (CYCLES_GT(target, now + STEP_N)) + next = now + STEP_N; + elprintf(EL_32X, "sh2 exec to %u %d,%d/%d, flags %x", next, + next - msh2.m68krcycles_done, next - ssh2.m68krcycles_done, m68k_target - now, Pico32x.emu_flags); + pprof_start(ssh2); if (!(ssh2.state & SH2_IDLE_STATES)) { - cycles = target - ssh2.m68krcycles_done; + cycles = next - ssh2.m68krcycles_done; if (cycles > 0) { - run_sh2(&ssh2, cycles); + run_sh2(&ssh2, cycles > 20U ? cycles : 20U); if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; + if (CYCLES_GT(next, target)) + next = target; } } + pprof_end(ssh2); + pprof_start(msh2); if (!(msh2.state & SH2_IDLE_STATES)) { - cycles = target - msh2.m68krcycles_done; + cycles = next - msh2.m68krcycles_done; if (cycles > 0) { - run_sh2(&msh2, cycles); + run_sh2(&msh2, cycles > 20U ? cycles : 20U); if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; + if (CYCLES_GT(next, target)) + next = target; } } + pprof_end(msh2); - now = target; - if (!(msh2.state & SH2_IDLE_STATES)) { - if (CYCLES_GT(now, msh2.m68krcycles_done)) + now = next; + if (CYCLES_GT(now, msh2.m68krcycles_done)) { + if (!(msh2.state & SH2_IDLE_STATES)) now = msh2.m68krcycles_done; } - if (!(ssh2.state & SH2_IDLE_STATES)) { - if (CYCLES_GT(now, ssh2.m68krcycles_done)) + if (CYCLES_GT(now, ssh2.m68krcycles_done)) { + if (!(ssh2.state & SH2_IDLE_STATES)) now = ssh2.m68krcycles_done; } + if (CYCLES_GT(now, timer_cycles+STEP_N)) { + if (msh2.state & SH2_TIMER_RUN) + p32x_timer_do(&msh2, now - timer_cycles); + if (ssh2.state & SH2_TIMER_RUN) + p32x_timer_do(&ssh2, now - timer_cycles); + timer_cycles = now; + } } - p32x_timers_do(now - timer_cycles); + if (msh2.state & SH2_TIMER_RUN) + p32x_timer_do(&msh2, now - timer_cycles); + if (ssh2.state & SH2_TIMER_RUN) + p32x_timer_do(&ssh2, now - timer_cycles); timer_cycles = now; } + pprof_end_sub(m68k); // advance idle CPUs if (msh2.state & SH2_IDLE_STATES) { @@ -499,32 +574,33 @@ void sync_sh2s_normal(unsigned int m68k_target) if (CYCLES_GT(m68k_target, ssh2.m68krcycles_done)) ssh2.m68krcycles_done = m68k_target; } -} -#define STEP_68K 24 + // everyone is in sync now + Pico32x.comm_dirty = 0; +} void sync_sh2s_lockstep(unsigned int m68k_target) { unsigned int mcycles; mcycles = msh2.m68krcycles_done; - if (ssh2.m68krcycles_done < mcycles) + if (CYCLES_GT(mcycles, ssh2.m68krcycles_done)) mcycles = ssh2.m68krcycles_done; - while (mcycles < m68k_target) { - mcycles += STEP_68K; + while (CYCLES_GT(m68k_target, mcycles)) { + mcycles += STEP_LS; sync_sh2s_normal(mcycles); } } #define CPUS_RUN(m68k_cycles) do { \ - if (PicoAHW & PAHW_MCD) \ + if (PicoIn.AHW & PAHW_MCD) \ pcd_run_cpus(m68k_cycles); \ else \ SekRunM68k(m68k_cycles); \ \ if ((Pico32x.emu_flags & P32XF_Z80_32X_IO) && Pico.m.z80Run \ - && !Pico.m.z80_reset && (PicoOpt & POPT_EN_Z80)) \ + && !Pico.m.z80_reset && (PicoIn.opt & POPT_EN_Z80)) \ PicoSyncZ80(SekCyclesDone()); \ if (Pico32x.emu_flags & (P32XF_68KCPOLL|P32XF_68KVPOLL)) \ p32x_sync_sh2s(SekCyclesDone()); \ @@ -536,20 +612,13 @@ void sync_sh2s_lockstep(unsigned int m68k_target) void PicoFrame32x(void) { - Pico.m.scanline = 0; - - Pico32x.vdp_regs[0x0a/2] &= ~P32XV_VBLK; // get out of vblank - if ((Pico32x.vdp_regs[0] & P32XV_Mx) != 0) // no forced blanking - Pico32x.vdp_regs[0x0a/2] &= ~P32XV_PEN; // no palette access - - if (!(Pico32x.sh2_regs[0] & 0x80)) - p32x_schedule_hint(NULL, SekCyclesDone()); - p32x_sh2_poll_event(&msh2, SH2_STATE_VPOLL, 0); - p32x_sh2_poll_event(&ssh2, SH2_STATE_VPOLL, 0); + if (PicoIn.AHW & PAHW_MCD) + pcd_prepare_frame(); PicoFrameStart(); + if (Pico32xDrawMode != PDM32X_BOTH) + Pico.est.rendstatus |= PDRAW_SYNC_NEEDED; PicoFrameHints(); - sh2_drc_frame(); elprintf(EL_32X, "poll: %02x %02x %02x", Pico32x.emu_flags & 3, msh2.state, ssh2.state); @@ -578,10 +647,24 @@ void Pico32xStateLoaded(int is_early) return; } - sh2s[0].m68krcycles_done = sh2s[1].m68krcycles_done = SekCyclesDone(); + if (CYCLES_GE(sh2s[0].m68krcycles_done - Pico.t.m68c_aim, 500) || + CYCLES_GE(sh2s[1].m68krcycles_done - Pico.t.m68c_aim, 500)) + sh2s[0].m68krcycles_done = sh2s[1].m68krcycles_done = SekCyclesDone(); p32x_update_irls(NULL, SekCyclesDone()); + p32x_timers_recalc(); p32x_pwm_state_loaded(); p32x_run_events(SekCyclesDone()); } +void Pico32xPrepare(void) +{ + if (msh2.mult_m68k_to_sh2 == 0 || msh2.mult_sh2_to_m68k == 0) + Pico32xSetClocks(PICO_MSH2_HZ, 0); + if (ssh2.mult_m68k_to_sh2 == 0 || ssh2.mult_sh2_to_m68k == 0) + Pico32xSetClocks(0, PICO_MSH2_HZ); + + sh2_execute_prepare(&msh2, PicoIn.opt & POPT_EN_DRC); + sh2_execute_prepare(&ssh2, PicoIn.opt & POPT_EN_DRC); +} + // vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/32x/draw.c b/pico/32x/draw.c index 66f67a7e..a119e6bb 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -1,33 +1,57 @@ /* * PicoDrive * (C) notaz, 2009,2010 + * (C) irixxxx, 2019-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ #include "../pico_int.h" +// NB: 32X officially doesn't support H32 mode. However, it does work since the +// cartridge slot carries the EDCLK signal which is always H40 clock and is used +// as video clock by the 32X. The H32 MD image is overlaid with the 320 px 32X +// image which has the same on-screen width. How the /YS signal on the cartridge +// slot (signalling the display of background color) is processed in this case +// is however unclear and might lead to glitches due to race conditions by the +// different video clocks for H32 and H40. + +// BGR555 to native conversion +#if defined(USE_BGR555) +#define PXCONV(t) ((t)&(mr|mg|mb|mp)) +#define PXPRIO 0x8000 // prio in MSB +#elif defined(USE_BGR565) +#define PXCONV(t) (((t)&mr) | (((t)&(mg|mb)) << 1) | (((t)&mp) >> 10)) +#define PXPRIO 0x0020 // prio in LS green bit +#else // RGB565 +#define PXCONV(t) ((((t)&mr) << 11) | (((t)&mg) << 1) | (((t)&(mp|mb)) >> 10)) +#define PXPRIO 0x0020 // prio in LS green bit +#endif + int (*PicoScan32xBegin)(unsigned int num); int (*PicoScan32xEnd)(unsigned int num); int Pico32xDrawMode; +void *DrawLineDestBase32x; +int DrawLineDestIncrement32x; + static void convert_pal555(int invert_prio) { - unsigned int *ps = (void *)Pico32xMem->pal; - unsigned int *pd = (void *)Pico32xMem->pal_native; - unsigned int m1 = 0x001f001f; - unsigned int m2 = 0x03e003e0; - unsigned int m3 = 0xfc00fc00; - unsigned int inv = 0; + u32 *ps = (void *)Pico32xMem->pal; + u32 *pd = (void *)Pico32xMem->pal_native; + u32 mr = 0x001f001f; // masks for red, green, blue, prio + u32 mg = 0x03e003e0; + u32 mb = 0x7c007c00; + u32 mp = 0x80008000; + u32 inv = 0; int i; if (invert_prio) - inv = 0x00200020; + inv = 0x80008000; - // place prio to LS green bit for (i = 0x100/2; i > 0; i--, ps++, pd++) { - unsigned int t = *ps; - *pd = (((t & m1) << 11) | ((t & m2) << 1) | ((t & m3) >> 10)) ^ inv; + u32 t = *ps ^ inv; + *pd = PXCONV(t); } Pico32x.dirty_pal = 0; @@ -36,19 +60,25 @@ static void convert_pal555(int invert_prio) // direct color mode #define do_line_dc(pd, p32x, pmd, inv, pmd_draw_code) \ { \ - const unsigned int m1 = 0x001f; \ - const unsigned int m2 = 0x03e0; \ - const unsigned int m3 = 0x7c00; \ - int i; \ + const u16 mr = 0x001f; \ + const u16 mg = 0x03e0; \ + const u16 mb = 0x7c00; \ + const u16 mp = 0x0000; \ + unsigned short t; \ + int i = 320; \ \ - for (i = 320; i > 0; i--, pd++, p32x++, pmd++) { \ - unsigned short t = *p32x; \ - if ((*pmd & 0x3f) != mdbg && !((t ^ inv) & 0x8000)) { \ - pmd_draw_code; \ - continue; \ + while (i > 0) { \ + for (; i > 0 && (*pmd & 0x3f) == mdbg; pd++, pmd++, i--) { \ + t = *p32x++; \ + *pd = PXCONV(t); \ + } \ + for (; i > 0 && (*pmd & 0x3f) != mdbg; pd++, pmd++, i--) { \ + t = *p32x++ ^ inv; \ + if (t & 0x8000) \ + *pd = PXCONV(t); \ + else \ + pmd_draw_code; \ } \ - \ - *pd = ((t & m1) << 11) | ((t & m2) << 1) | ((t & m3) >> 10); \ } \ } @@ -56,15 +86,21 @@ static void convert_pal555(int invert_prio) #define do_line_pp(pd, p32x, pmd, pmd_draw_code) \ { \ unsigned short t; \ - int i; \ - for (i = 320; i > 0; i--, pd++, p32x++, pmd++) { \ - t = pal[*(unsigned char *)((long)p32x ^ 1)]; \ - if ((t & 0x20) || (*pmd & 0x3f) == mdbg) \ + int i = 320; \ + while (i > 0) { \ + for (; i > 0 && (*pmd & 0x3f) == mdbg; pd++, pmd++, i--) { \ + t = pal[*(unsigned char *)(MEM_BE2((uintptr_t)(p32x++)))]; \ *pd = t; \ - else \ - pmd_draw_code; \ + } \ + for (; i > 0 && (*pmd & 0x3f) != mdbg; pd++, pmd++, i--) { \ + t = pal[*(unsigned char *)(MEM_BE2((uintptr_t)(p32x++)))]; \ + if (t & PXPRIO) \ + *pd = t; \ + else \ + pmd_draw_code; \ + } \ } \ -} +} // run length mode #define do_line_rl(pd, p32x, pmd, pmd_draw_code) \ @@ -74,7 +110,7 @@ static void convert_pal555(int invert_prio) for (i = 320; i > 0; p32x++) { \ t = pal[*p32x & 0xff]; \ for (len = (*p32x >> 8) + 1; len > 0 && i > 0; len--, i--, pd++, pmd++) { \ - if ((*pmd & 0x3f) == mdbg || (t & 0x20)) \ + if ((*pmd & 0x3f) == mdbg || (t & PXPRIO)) \ *pd = t; \ else \ pmd_draw_code; \ @@ -83,20 +119,18 @@ static void convert_pal555(int invert_prio) } // this is almost never used (Wiz and menu bg gen only) -void FinalizeLine32xRGB555(int sh, int line) +void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est) { - unsigned short *pd = DrawLineDest; + unsigned short *pd = est->DrawLineDest; unsigned short *pal = Pico32xMem->pal_native; - unsigned char *pmd = HighCol + 8; + unsigned char *pmd = est->HighCol + 8; unsigned short *dram, *p32x; unsigned char mdbg; - FinalizeLine555(sh, line); + FinalizeLine555(sh, line, est); if ((Pico32x.vdp_regs[0] & P32XV_Mx) == 0 || // 32x blanking - // XXX: how is 32col mode hadled by real hardware? - !(Pico.video.reg[12] & 1) || // 32col mode - !(PicoDrawMask & PDRAW_32X_ON)) + (Pico.video.debug_p & PVD_KILL_32X)) { return; } @@ -130,10 +164,11 @@ void FinalizeLine32xRGB555(int sh, int line) #define PICOSCAN_PRE \ PicoScan32xBegin(l + (lines_sft_offs & 0xff)); \ - dst = DrawLineDest; \ + dst = Pico.est.DrawLineDest; \ #define PICOSCAN_POST \ PicoScan32xEnd(l + (lines_sft_offs & 0xff)); \ + Pico.est.DrawLineDest = (char *)Pico.est.DrawLineDest + DrawLineDestIncrement32x; \ #define make_do_loop(name, pre_code, post_code, md_code) \ /* Direct Color Mode */ \ @@ -141,9 +176,9 @@ static void do_loop_dc##name(unsigned short *dst, \ unsigned short *dram, int lines_sft_offs, int mdbg) \ { \ int inv_bit = (Pico32x.vdp_regs[0] & P32XV_PRI) ? 0x8000 : 0; \ - unsigned char *pmd = PicoDraw2FB + \ + unsigned char *pmd = Pico.est.Draw2FB + \ 328 * (lines_sft_offs & 0xff) + 8; \ - unsigned short *palmd = HighPal; \ + unsigned short *palmd = Pico.est.HighPal; \ unsigned short *p32x; \ int lines = lines_sft_offs >> 16; \ int l; \ @@ -153,6 +188,7 @@ static void do_loop_dc##name(unsigned short *dst, \ p32x = dram + dram[l]; \ do_line_dc(dst, p32x, pmd, inv_bit, md_code); \ post_code; \ + dst += DrawLineDestIncrement32x/2 - 320; \ } \ } \ \ @@ -161,9 +197,9 @@ static void do_loop_pp##name(unsigned short *dst, \ unsigned short *dram, int lines_sft_offs, int mdbg) \ { \ unsigned short *pal = Pico32xMem->pal_native; \ - unsigned char *pmd = PicoDraw2FB + \ + unsigned char *pmd = Pico.est.Draw2FB + \ 328 * (lines_sft_offs & 0xff) + 8; \ - unsigned short *palmd = HighPal; \ + unsigned short *palmd = Pico.est.HighPal; \ unsigned char *p32x; \ int lines = lines_sft_offs >> 16; \ int l; \ @@ -174,6 +210,7 @@ static void do_loop_pp##name(unsigned short *dst, \ p32x += (lines_sft_offs >> 8) & 1; \ do_line_pp(dst, p32x, pmd, md_code); \ post_code; \ + dst += DrawLineDestIncrement32x/2 - 320; \ } \ } \ \ @@ -182,9 +219,9 @@ static void do_loop_rl##name(unsigned short *dst, \ unsigned short *dram, int lines_sft_offs, int mdbg) \ { \ unsigned short *pal = Pico32xMem->pal_native; \ - unsigned char *pmd = PicoDraw2FB + \ + unsigned char *pmd = Pico.est.Draw2FB + \ 328 * (lines_sft_offs & 0xff) + 8; \ - unsigned short *palmd = HighPal; \ + unsigned short *palmd = Pico.est.HighPal; \ unsigned short *p32x; \ int lines = lines_sft_offs >> 16; \ int l; \ @@ -194,6 +231,7 @@ static void do_loop_rl##name(unsigned short *dst, \ p32x = dram + dram[l]; \ do_line_rl(dst, p32x, pmd, md_code); \ post_code; \ + dst += DrawLineDestIncrement32x/2 - 320; \ } \ } @@ -228,13 +266,12 @@ void PicoDraw32xLayer(int offs, int lines, int md_bg) int lines_sft_offs; int which_func; - DrawLineDest = (char *)DrawLineDestBase + offs * DrawLineDestIncrement; + Pico.est.DrawLineDest = (char *)DrawLineDestBase32x + offs * DrawLineDestIncrement32x; + Pico.est.DrawLineDestIncr = DrawLineDestIncrement32x; dram = Pico32xMem->dram[Pico32x.vdp_regs[0x0a/2] & P32XV_FS]; - if (Pico32xDrawMode == PDM32X_BOTH) { - if (Pico.m.dirtyPal) - PicoDrawUpdateHighPal(); - } + if (Pico32xDrawMode == PDM32X_BOTH) + PicoDrawUpdateHighPal(); if ((Pico32x.vdp_regs[0] & P32XV_Mx) == 2) { @@ -266,33 +303,26 @@ do_it: if (Pico32x.vdp_regs[2 / 2] & P32XV_SFT) lines_sft_offs |= 1 << 8; - do_loop[which_func](DrawLineDest, dram, lines_sft_offs, md_bg); + do_loop[which_func](Pico.est.DrawLineDest, dram, lines_sft_offs, md_bg); } // mostly unused, games tend to keep 32X layer on void PicoDraw32xLayerMdOnly(int offs, int lines) { int have_scan = PicoScan32xBegin != NULL && PicoScan32xEnd != NULL; - unsigned short *dst = (void *)((char *)DrawLineDestBase + offs * DrawLineDestIncrement); - unsigned char *pmd = PicoDraw2FB + 328 * offs + 8; - unsigned short *pal = HighPal; + unsigned short *dst = (void *)((char *)DrawLineDestBase32x + offs * DrawLineDestIncrement32x); + unsigned char *pmd = Pico.est.Draw2FB + 328 * offs + 8; + unsigned short *pal = Pico.est.HighPal; int poffs = 0, plen = 320; int l, p; - if (!(Pico.video.reg[12] & 1)) { - // 32col mode - poffs = 32; - plen = 256; - } - - if (Pico.m.dirtyPal) - PicoDrawUpdateHighPal(); + PicoDrawUpdateHighPal(); dst += poffs; for (l = 0; l < lines; l++) { if (have_scan) { PicoScan32xBegin(l + offs); - dst = DrawLineDest + poffs; + dst = (unsigned short *)Pico.est.DrawLineDest + poffs; } for (p = 0; p < plen; p += 4) { dst[p + 0] = pal[*pmd++]; @@ -300,7 +330,7 @@ void PicoDraw32xLayerMdOnly(int offs, int lines) dst[p + 2] = pal[*pmd++]; dst[p + 3] = pal[*pmd++]; } - dst = (void *)((char *)dst + DrawLineDestIncrement); + dst = Pico.est.DrawLineDest = (char *)dst + DrawLineDestIncrement32x; pmd += 328 - plen; if (have_scan) PicoScan32xEnd(l + offs); @@ -309,21 +339,32 @@ void PicoDraw32xLayerMdOnly(int offs, int lines) void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode) { -#ifdef _ASM_32X_DRAW - extern void *Pico32xNativePal; - Pico32xNativePal = Pico32xMem->pal_native; -#endif - - if (which == PDF_RGB555 && use_32x_line_mode) { - // we'll draw via FinalizeLine32xRGB555 (rare) + if (which == PDF_RGB555) { + // CLUT pixels needed as well, for layer priority + PicoDrawSetInternalBuf(Pico.est.Draw2FB, 328); + PicoDrawSetOutBufMD(NULL, 0); + } else { + // store CLUT pixels, same layout as alt renderer PicoDrawSetInternalBuf(NULL, 0); - Pico32xDrawMode = PDM32X_OFF; - return; + PicoDrawSetOutBufMD(Pico.est.Draw2FB, 328); } - // use the same layout as alt renderer - PicoDrawSetInternalBuf(PicoDraw2FB, 328); - Pico32xDrawMode = (which == PDF_RGB555) ? PDM32X_32X_ONLY : PDM32X_BOTH; + if (use_32x_line_mode) + // we'll draw via FinalizeLine32xRGB555 (rare) + Pico32xDrawMode = PDM32X_OFF; + else + // in RGB555 mode the 32x layer is drawn over the MD layer, in the other + // modes 32x and MD layer are merged together by the 32x renderer + Pico32xDrawMode = (which == PDF_RGB555) ? PDM32X_32X_ONLY : PDM32X_BOTH; +} + +void PicoDrawSetOutBuf32X(void *dest, int increment) +{ + DrawLineDestBase32x = dest; + DrawLineDestIncrement32x = increment; + // in RGB555 mode this buffer is also used by the MD renderer + if (Pico32xDrawMode != PDM32X_BOTH) + PicoDrawSetOutBufMD(DrawLineDestBase32x, DrawLineDestIncrement32x); } // vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/32x/draw_arm.S b/pico/32x/draw_arm.S new file mode 100644 index 00000000..f2b992fe --- /dev/null +++ b/pico/32x/draw_arm.S @@ -0,0 +1,515 @@ +@* +@* PicoDrive +@* (C) notaz, 2010 +@* (C) irixxxx, 2019-2023 +@* +@* This work is licensed under the terms of MAME license. +@* See COPYING file in the top-level directory. +@* + +#include +#include + +.extern Pico32x +.extern Pico + +.equiv P32XV_PRI, (1<< 7) + +.text +.align 2 + + PIC_LDR_INIT() + +.macro call_scan_prep cond pico @ &Pico +.if \cond + PIC_LDR(r4, r6, PicoScan32xBegin) + PIC_LDR(r5, r6, PicoScan32xEnd) + add r6, \pico, #OFS_Pico_est + ldr r4, [r4] + ldr r5, [r5] + stmfd sp!, {r4,r5,r6} +.endif +.endm + +.macro call_scan_fin_ge cond +.if \cond + addge sp, sp, #4*3 +.endif +.endm + +.macro call_scan_begin cond +.if \cond + stmfd sp!, {r1-r3} + and r0, r2, #0xff + add r0, r0, r4 + mov lr, pc + ldr pc, [sp, #(3+0)*4] + ldr r0, [sp, #(3+2)*4] @ &Pico.est + ldmfd sp!, {r1-r3} + ldr r0, [r0, #OFS_EST_DrawLineDest] +.endif +.endm + +.macro call_scan_end cond +.if \cond + stmfd sp!, {r0-r3} + and r0, r2, #0xff + add r0, r0, r4 + mov lr, pc + ldr pc, [sp, #(4+1)*4] + ldr r1, [sp, #(4+2)*4] @ &Pico.est + ldr r0, [r1, #OFS_EST_DrawLineDest] + ldr r2, [r1, #OFS_EST_DrawLineDestIncr] + add r0, r0, r2 + str r0, [r1, #OFS_EST_DrawLineDest] + ldmfd sp!, {r0-r3} +.endif +.endm + +@ direct color +@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg +.macro make_do_loop_dc name call_scan do_md +.global \name +\name: + stmfd sp!, {r4-r11,lr} + + PIC_LDR(lr, r9, Pico) + PIC_LDR(r10,r9, Pico32x) + ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] + ldrh r10,[r10, #0x40] @ Pico32x.vdp_regs[0] + add r9, lr, #OFS_Pico_est+OFS_EST_HighPal @ palmd + + and r4, r2, #0xff + mov r5, #328 + mov r3, r3, lsl #26 @ mdbg << 26 + mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data + tst r10,#P32XV_PRI + movne r10,#0 + moveq r10,#0x8000 @ r10 = !inv_bit + call_scan_prep \call_scan lr + + mov r4, #0 @ line + b 1f @ loop_outer_entry + +0: @ loop_outer: + call_scan_end \call_scan + ldr r12, [r9, #OFS_EST_DrawLineDestIncr-OFS_EST_HighPal] + sub r0, r0, #320*2 + add r0, r0, r12 + add r4, r4, #1 + cmp r4, r2, lsr #16 + call_scan_fin_ge \call_scan + ldmgefd sp!, {r4-r11,pc} + +1: @ loop_outer_entry: + call_scan_begin \call_scan + mov r12,r4, lsl #1 + ldrh r12,[r1, r12] + add r11,r11,#8 + mov r6, #320/2 + add r5, r1, r12, lsl #1 @ p32x = dram + dram[l] + +2: @ loop_inner: +@ r4,r6 - counters; r5 - 32x data; r9 - md pal; r10 - inv_prio; r11 - md data +@ r7,r8,r12,lr - temp + ldrh r7, [r5], #2 + ldrh r8, [r5], #2 + subs r6, r6, #1 + blt 0b @ loop_outer + cmp r7, r8 + beq 5f @ check_fill + +3: @ no_fill: + ldrb r12,[r11], #1 @ MD pixel 0 + + eor r7, r7, r10 + and lr, r7, #0x03e0 @ convert BGR555 -> RGB565 + mov r7, r7, ror #5 + orr r7, r7, r7, ror #10+11 + orr r7, r7, lr, lsl #1+16 + eor r8, r8, r10 + and lr, r8, #0x03e0 + mov r8, r8, ror #5 + orr r8, r8, r8, ror #10+11 + orr r8, r8, lr, lsl #1+16 + + ldrb lr, [r11], #1 @ MD pixel 1 + +.if \do_md + cmp r3, r12, lsl #26 + tstne r7, #0x20<<16 + movne r12,r12, lsl #1 @ load MD color if no 32X prio and not bg + ldrneh r12,[r9, r12] + moveq r12,r7, lsr #16 @ else replace with 32X color + + cmp r3, lr, lsl #26 + tstne r8, #0x20<<16 + movne lr, lr, lsl #1 @ load MD color if no 32X prio and not bg + ldrneh lr, [r9, lr] + moveq lr, r8, lsr #16 @ else replace with 32X color + + orr r12,r12, lr, lsl #16 @ combine 2 pixels to optimize memory bandwidth + str r12,[r0], #4 @ (no write combining on ARM9) +.else + cmp r3, r12, lsl #26 @ replace MD bg info into prio bit + tstne r7, #0x20<<16 + moveq r7, r7, lsr #16 + streqh r7, [r0, #0] + + cmp r3, lr, lsl #26 + tstne r8, #0x20<<16 + moveq r8, r8, lsr #16 + streqh r8, [r0, #2] + + add r0, r0, #4 @ store 32x pixels if 32X prio or MD bg +.endif + b 2b @ loop_inner + +5: @ check_fill: + @ count pixels, align if needed + ldrh r12,[r5, #0] @ only do this for at least 4 pixels + ldrh lr ,[r5, #2] + cmp r12,r7 + cmpeq lr ,r7 + bne 3b @ no_fill + add r5, r5, #4 @ adjust for the check above + + sub lr, r5, #4+4 @ starting r5 (32x render data start) + add r6, r6, #1 @ restore from dec +6: @ count_loop: + sub r12,r5, lr @ loop checks 2 pixels + ldrh r8, [r5], #2 + cmp r12,r6, lsl #2 + ldrh r12,[r5], #2 + bge 7f @ count_done + cmp r8, r7 + cmpeq r12,r7 + beq 6b + +7: @ count_done: + sub r5, r5, #4 @ undo readahead + + sub r8, r5, lr @ pixel count + mov r8, r8, lsr #1 + + cmp r8, r6, lsl #1 @ limit count to line length + movgt r8, r6, lsl #1 + sub r6, r6, r8, lsr #1 @ consume pixels + + eor r7, r7, r10 + and r12,r7, #0x03e0 @ convert BGR555 -> RGB565 + mov r7, r7, ror #5 + orr r7, r7, r7, ror #10+11 + orr r7, r7, r12,lsl #1+16 + mov r7, r7, lsr #16 + + tst r7, #0x20 @ check for prio transfer + bne 9f @ bg_loop + + add r11,r11,r8 @ consume md pixels (not used) + orr r12,r7, r7, lsl #16 + mov r7 ,r12 +8: @ 32x_loop: + subs r8, r8, #4 @ store 4 pixels + stmgeia r0!, {r7, r12} + bgt 8b @ 32x_loop + beq 2b @ loop_inner + adds r8, r8, #2 + strge r7, [r0], #4 @ store 2 leftover pixels + b 2b @ loop_inner + +9: @ bg_loop: + ldrb r12,[r11],#1 @ MD pixel 0,1 + ldrb lr, [r11],#1 +.if \do_md + cmp r3, r12,lsl #26 @ MD pixel 0 has bg? + mov r12,r12,lsl #1 + ldrneh r12,[r9, r12] @ t = palmd[*pmd] + moveq r12,r7 + cmp r3, lr, lsl #26 @ MD pixel 1 has bg? + mov lr, lr, lsl #1 + ldrneh lr, [r9, lr] + moveq lr, r7 + orr r12,r12,lr, lsl #16 @ combine 2 pixels to optimize memory bandwidth + str r12,[r0], #4 @ (no write combining on ARM9) +.else + add r0, r0, #4 + cmp r3, r12,lsl #26 @ MD pixel 0 has bg? + streqh r7, [r0, #-4] + cmp r3, lr, lsl #26 @ MD pixel 1 has bg? + streqh r7, [r0, #-2] +.endif + subs r8, r8, #2 + bgt 9b @ bg_loop + b 2b @ loop_inner +.endm + + +@ packed pixel +@ note: this may read a few bytes over the end of PicoDraw2FB and dram, +@ so those should have a bit more alloc'ed than really needed. +@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg +.macro make_do_loop_pp name call_scan do_md +.global \name +\name: + stmfd sp!, {r4-r11,lr} + + PIC_LDR(lr, r9, Pico) + PIC_LDR(r10,r9, Pico32xMem) + ldr r9, =OFS_PMEM32x_pal_native + ldr r10, [r10] + ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] + add r10,r10,r9 + add r9, lr, #OFS_Pico_est+OFS_EST_HighPal @ palmd + + and r4, r2, #0xff + mov r5, #328 + mov r3, r3, lsl #26 @ mdbg << 26 + mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data + call_scan_prep \call_scan lr + + mov r4, #0 @ line + b 1f @ loop_outer_entry + +0: @ loop_outer: + call_scan_end \call_scan + ldr r12, [r9, #OFS_EST_DrawLineDestIncr-OFS_EST_HighPal] + sub r0, r0, #320*2 + add r0, r0, r12 + add r4, r4, #1 + cmp r4, r2, lsr #16 + call_scan_fin_ge \call_scan + ldmgefd sp!, {r4-r11,pc} + +1: @ loop_outer_entry: + call_scan_begin \call_scan + mov r12,r4, lsl #1 + ldrh r12,[r1, r12] + add r11,r11,#8 + mov r6, #320/2 + add r5, r1, r12, lsl #1 @ p32x = dram + dram[l] + and r12,r2, #0x100 @ shift + add r5, r5, r12,lsr #8 + +2: @ loop_inner: +@ r4,r6 - counters; r5 - 32x data; r9,r10 - md,32x pal; r11 - md data +@ r7,r8,r12,lr - temp + tst r5, #1 + ldreqb r8, [r5], #2 + ldrb r7, [r5, #-1] + ldrneb r8, [r5, #2]! @ r7,r8 - 32X pixel 0,1 + subs r6, r6, #1 + blt 0b @ loop_outer + cmp r7, r8 + beq 5f @ check_fill + +3: @ no_fill: + ldrb r12,[r11], #1 @ MD pixel 0 + ldrb lr, [r11], #1 @ MD pixel 1 + + mov r7, r7, lsl #1 + mov r8, r8, lsl #1 + ldrh r7, [r10,r7] @ 32X color 0 + ldrh r8, [r10,r8] @ 32X color 1 + +.if \do_md + cmp r3, r12, lsl #26 + movne r12,r12, lsl #1 @ load MD color if not bg + ldrneh r12,[r9, r12] + orreq r7, r7, #0x20 @ accumulate MD bg info into prio bit + cmp r3, lr, lsl #26 + movne lr, lr, lsl #1 + ldrneh lr, [r9, lr] + orreq r8, r8, #0x20 + + tst r7, #0x20 @ replace 32X with MD color if no prio and not bg + moveq r7, r12 + tst r8, #0x20 + moveq r8, lr + orr r7, r7, r8, lsl #16 @ combine 2 pixels to optimize memory bandwidth + str r7, [r0], #4 @ (no write combining on ARM9) +.else + cmp r3, r12, lsl #26 @ replace MD bg info into prio bit + orreq r7, r7, #0x20 + cmp r3, lr, lsl #26 + orreq r8, r8, #0x20 + + add r0, r0, #4 @ store 32x pixels if 32X prio or MD bg + tst r7, #0x20 + strneh r7, [r0, #-4] + tst r8, #0x20 + strneh r8, [r0, #-2] +.endif + b 2b @ loop_inner + +5: @ check_fill: + @ count pixels, align if needed + bic r12,r5, #1 + ldrh r12,[r12, #0] @ only do this for at least 4 pixels + orr lr, r7, r7, lsl #8 + cmp r12,lr + bne 3b @ no_fill + add r5, r5, #2 @ adjust for the check above + + sub lr, r5, #4 @ starting r5 (32x render data start) + bic r5, r5, #1 + add r6, r6, #1 @ restore from dec + orr r7, r7, r7, lsl #8 +6: @ count_loop: + sub r12,r5, lr @ loop checks 4 pixels + ldrh r8, [r5], #2 + cmp r12,r6, lsl #1 + ldrh r12,[r5], #2 + bge 7f @ count_done + cmp r8, r7 + cmpeq r12,r7 + beq 6b + cmp r8, r7 + addeq r5, r5, #2 @ adjust if 2 pixels where ok + +7: @ count_done: + sub r5, r5, #4 @ undo readahead + + tst lr, #1 @ fix alignment and calculate count + subne r5, r5, #1 + sub r8, r5, lr + + and r7, r7, #0xff @ 32x pixel color + mov r7, r7, lsl #1 + ldrh r7, [r10,r7] + + cmp r8, r6, lsl #1 @ limit count to line length + movgt r8, r6, lsl #1 + sub r6, r6, r8, lsr #1 @ consume pixels + + tst r7, #0x20 @ check for prio transfer + beq 9f @ bg_loop + + add r11,r11,r8 @ consume md pixels (not used) + orr r12,r7, r7, lsl #16 + mov r7 ,r12 +8: @ 32x_loop: + subs r8, r8, #4 @ store 4 pixels + stmgeia r0!, {r7, r12} + bgt 8b @ 32x_loop + beq 2b @ loop_inner + adds r8, r8, #2 + strge r7, [r0], #4 @ store 2 leftover pixels + b 2b @ loop_inner + +9: @ bg_loop: + ldrb r12,[r11],#1 @ MD pixel 0,1 + ldrb lr, [r11],#1 +.if \do_md + cmp r3, r12,lsl #26 @ MD pixel 0 has bg? + mov r12,r12,lsl #1 + ldrneh r12,[r9, r12] @ t = palmd[*pmd] + moveq r12,r7 + cmp r3, lr, lsl #26 @ MD pixel 1 has bg? + mov lr, lr, lsl #1 + ldrneh lr, [r9, lr] + moveq lr, r7 + orr r12,r12,lr, lsl #16 @ combine 2 pixels to optimize memory bandwidth + str r12,[r0], #4 @ (no write combining on ARM9) +.else + add r0, r0, #4 + cmp r3, r12,lsl #26 @ MD pixel 0 has bg? + streqh r7, [r0, #-4] + cmp r3, lr, lsl #26 @ MD pixel 1 has bg? + streqh r7, [r0, #-2] +.endif + subs r8, r8, #2 + bgt 9b @ bg_loop + b 2b @ loop_inner +.endm + + +@ run length +@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg +.macro make_do_loop_rl name call_scan do_md +.global \name +\name: + stmfd sp!, {r4-r11,lr} + + PIC_LDR(lr, r9, Pico) + PIC_LDR(r10,r9, Pico32xMem) + ldr r9, =OFS_PMEM32x_pal_native + ldr r10, [r10] + ldr r11, [lr, #OFS_Pico_est+OFS_EST_Draw2FB] + add r10,r10,r9 + add r9, lr, #OFS_Pico_est+OFS_EST_HighPal @ palmd + + and r4, r2, #0xff + mov r5, #328 + mov r3, r3, lsl #26 @ mdbg << 26 + mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data + call_scan_prep \call_scan lr + + mov r4, #0 @ line + b 1f @ loop_outer_entry + +0: @ loop_outer: + call_scan_end \call_scan + ldr r12, [r9, #OFS_EST_DrawLineDestIncr-OFS_EST_HighPal] + sub r0, r0, #320*2 + add r0, r0, r12 + add r4, r4, #1 + cmp r4, r2, lsr #16 + call_scan_fin_ge \call_scan + ldmgefd sp!, {r4-r11,pc} + +1: @ loop_outer_entry: + call_scan_begin \call_scan + mov r12,r4, lsl #1 + ldrh r12,[r1, r12] + add r11,r11,#8 + mov r6, #320 + add r5, r1, r12, lsl #1 @ p32x = dram + dram[l] + +2: @ loop_inner: + ldrh r8, [r5], #2 @ control word + and r12,r8, #0xff + mov r12,r12,lsl #1 + ldrh lr, [r10,r12] @ t = 32x pixel + eor lr, lr, #0x20 + +3: @ loop_innermost: + subs r6, r6, #1 + ldrgeb r7, [r11], #1 @ MD pixel + blt 0b @ loop_outer + tst lr, #0x20 + cmpne r3, r7, lsl #26 @ MD has bg pixel? +.if \do_md + mov r7, r7, lsl #1 + ldrneh r12,[r9, r7] @ t = palmd[*pmd] + streqh lr, [r0], #2 + strneh r12,[r0], #2 @ *dst++ = t +.else + streqh lr, [r0] + add r0, r0, #2 +.endif + subs r8, r8, #0x100 + bge 3b @ loop_innermost + b 2b @ loop_inner +.endm + + +make_do_loop_dc do_loop_dc, 0, 0 +make_do_loop_dc do_loop_dc_md, 0, 1 +make_do_loop_dc do_loop_dc_scan, 1, 0 +make_do_loop_dc do_loop_dc_scan_md, 1, 1 +.pool + +make_do_loop_pp do_loop_pp, 0, 0 +make_do_loop_pp do_loop_pp_md, 0, 1 +make_do_loop_pp do_loop_pp_scan, 1, 0 +make_do_loop_pp do_loop_pp_scan_md, 1, 1 +.pool + +make_do_loop_rl do_loop_rl, 0, 0 +make_do_loop_rl do_loop_rl_md, 0, 1 +make_do_loop_rl do_loop_rl_scan, 1, 0 +make_do_loop_rl do_loop_rl_scan_md, 1, 1 +.pool + +@ vim:filetype=armasm diff --git a/pico/32x/draw_arm.s b/pico/32x/draw_arm.s deleted file mode 100644 index ba66fbf1..00000000 --- a/pico/32x/draw_arm.s +++ /dev/null @@ -1,373 +0,0 @@ -@* -@* PicoDrive -@* (C) notaz, 2010 -@* -@* This work is licensed under the terms of MAME license. -@* See COPYING file in the top-level directory. -@* - -.extern Pico32x -.extern PicoDraw2FB -.extern HighPal - -.equiv P32XV_PRI, (1<< 7) - -.bss -.align 2 -.global Pico32xNativePal -Pico32xNativePal: - .word 0 - -.text -.align 2 - - -.macro call_scan_prep cond -.if \cond - ldr r4, =PicoScan32xBegin - ldr r5, =PicoScan32xEnd - ldr r6, =DrawLineDest - ldr r4, [r4] - ldr r5, [r5] - stmfd sp!, {r4,r5,r6} -.endif -.endm - -.macro call_scan_fin_ge cond -.if \cond - addge sp, sp, #4*3 -.endif -.endm - -.macro call_scan_begin cond -.if \cond - stmfd sp!, {r1-r3} - and r0, r2, #0xff - add r0, r0, r4 - mov lr, pc - ldr pc, [sp, #(3+0)*4] - ldr r0, [sp, #(3+2)*4] @ &DrawLineDest - ldmfd sp!, {r1-r3} - ldr r0, [r0] -.endif -.endm - -.macro call_scan_end cond -.if \cond - stmfd sp!, {r0-r3} - and r0, r2, #0xff - add r0, r0, r4 - mov lr, pc - ldr pc, [sp, #(4+1)*4] - ldmfd sp!, {r0-r3} -.endif -.endm - -@ direct color -@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg -.macro make_do_loop_dc name call_scan do_md -.global \name -\name: - stmfd sp!, {r4-r11,lr} - - ldr r10,=Pico32x - ldr r11,=PicoDraw2FB - ldr r10,[r10, #0x40] @ Pico32x.vdp_regs[0] - ldr r11,[r11] - ldr r9, =HighPal @ palmd - and r4, r2, #0xff - mov r5, #328 - lsl r3, #26 @ mdbg << 26 - mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data - tst r10,#P32XV_PRI - moveq r10,#0 - movne r10,#0x8000 @ r10 = inv_bit - call_scan_prep \call_scan - - mov r4, #0 @ line - b 1f @ loop_outer_entry - -0: @ loop_outer: - call_scan_end \call_scan - add r4, r4, #1 - sub r11,r11,#1 @ adjust for prev read - cmp r4, r2, lsr #16 - call_scan_fin_ge \call_scan - ldmgefd sp!, {r4-r11,pc} - -1: @ loop_outer_entry: - call_scan_begin \call_scan - mov r12,r4, lsl #1 - ldrh r12,[r1, r12] - add r11,r11,#8 - mov r6, #320 - add r5, r1, r12, lsl #1 @ p32x = dram + dram[l] - -2: @ loop_inner: - ldrb r7, [r11], #1 @ MD pixel - subs r6, r6, #1 - blt 0b @ loop_outer - ldrh r8, [r5], #2 @ 32x pixel - cmp r3, r7, lsl #26 @ MD has bg pixel? - beq 3f @ draw32x - eor r12,r8, r10 - ands r12,r12,#0x8000 @ !((t ^ inv) & 0x8000) -.if \do_md - mov r7, r7, lsl #1 - ldreqh r12,[r9, r7] - streqh r12,[r0], #2 @ *dst++ = palmd[*pmd] -.endif - beq 2b @ loop_inner - -3: @ draw32x: - and r12,r8, #0x03e0 - mov r8, r8, lsl #11 - orr r8, r8, r8, lsr #(10+11) - orr r8, r8, r12,lsl #1 - bic r8, r8, #0x0020 @ kill prio bit - strh r8, [r0], #2 @ *dst++ = bgr2rgb(*p32x++) - b 2b @ loop_inner -.endm - - -@ packed pixel -@ note: this may read a few bytes over the end of PicoDraw2FB and dram, -@ so those should have a bit more alloc'ed than really needed. -@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg -.macro make_do_loop_pp name call_scan do_md -.global \name -\name: - stmfd sp!, {r4-r11,lr} - - ldr r11,=PicoDraw2FB - ldr r10,=Pico32xNativePal - ldr r11,[r11] - ldr r10,[r10] - ldr r9, =HighPal @ palmd - and r4, r2, #0xff - mov r5, #328 - lsl r3, #26 @ mdbg << 26 - mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data - call_scan_prep \call_scan - - mov r4, #0 @ line - b 1f @ loop_outer_entry - -0: @ loop_outer: - call_scan_end \call_scan - add r4, r4, #1 - cmp r4, r2, lsr #16 - call_scan_fin_ge \call_scan - ldmgefd sp!, {r4-r11,pc} - -1: @ loop_outer_entry: - call_scan_begin \call_scan - mov r12,r4, lsl #1 - ldrh r12,[r1, r12] - add r11,r11,#8 - mov r6, #320/2 - add r5, r1, r12, lsl #1 @ p32x = dram + dram[l] - and r12,r2, #0x100 @ shift - add r5, r5, r12,lsr #8 - -2: @ loop_inner: -@ r4,r6 - counters; r5 - 32x data; r9,r10 - md,32x pal; r11 - md data -@ r7,r8,r12,lr - temp - tst r5, #1 - ldreqb r8, [r5], #2 - ldrb r7, [r5, #-1] - ldrneb r8, [r5, #2]! @ r7,r8 - pixel 0,1 index - subs r6, r6, #1 - blt 0b @ loop_outer - cmp r7, r8 - beq 5f @ check_fill @ +8 - -3: @ no_fill: - mov r12,r7, lsl #1 - mov lr, r8, lsl #1 - ldrh r7, [r10,r12] - ldrh r8, [r10,lr] - add r11,r11,#2 - - eor r12,r7, #0x20 - tst r12,#0x20 - ldrneb r12,[r11,#-2] @ MD pixel 0 - eor lr, r8, #0x20 - cmpne r3, r12, lsl #26 @ MD has bg pixel? -.if \do_md - mov r12,r12,lsl #1 - ldrneh r7, [r9, r12] @ t = palmd[pmd[0]] - tst lr, #0x20 - ldrneb lr, [r11,#-1] @ MD pixel 1 - strh r7, [r0], #2 - cmpne r3, lr, lsl #26 @ MD has bg pixel? - mov lr, lr, lsl #1 - ldrneh r8, [r9, lr] @ t = palmd[pmd[1]] - strh r8, [r0], #2 -.else - streqh r7, [r0] - tst lr, #0x20 - ldrneb lr, [r11,#-1] @ MD pixel 1 - add r0, r0, #4 - cmpne r3, lr, lsl #26 @ MD has bg pixel? - streqh r8, [r0, #-2] -.endif - b 2b @ loop_inner - -5: @ check_fill - @ count pixels, align if needed - bic r12,r5, #1 - ldrh r12,[r12] - orr lr, r7, r7, lsl #8 - cmp r12,lr - bne 3b @ no_fill - - tst r5, #1 - sub lr, r5, #2 @ starting r5 (32x render data start) - addeq r5, r5, #2 - addne r5, r5, #1 @ add for the check above - add r6, r6, #1 @ restore from dec - orr r7, r7, r7, lsl #8 -6: - sub r12,r5, lr - ldrh r8, [r5], #2 - cmp r12,r6, lsl #1 - ldrh r12,[r5], #2 - bge 7f @ count_done - cmp r8, r7 - cmpeq r12,r7 - beq 6b - -7: @ count_done - sub r5, r5, #4 @ undo readahead - - @ fix alignment and check type - sub r8, r5, lr - tst r8, #1 - subne r5, r5, #1 - subne r8, r8, #1 - - and r7, r7, #0xff - cmp r8, r6, lsl #1 - mov r7, r7, lsl #1 - movgt r8, r6, lsl #1 @ r8=count - ldrh r7, [r10,r7] - sub r6, r6, r8, lsr #1 @ adjust counter - tst r7, #0x20 - beq 9f @ bg_mode - - add r11,r11,r8 -8: - subs r8, r8, #2 - strgeh r7, [r0], #2 - strgeh r7, [r0], #2 - bgt 8b - b 2b @ loop_inner - -9: @ bg_mode: - ldrb r12,[r11],#1 @ MD pixel - ldrb lr, [r11],#1 - cmp r3, lr, lsl #26 @ MD has bg pixel? -.if \do_md - mov r12,r12,lsl #1 - ldrneh r12,[r9, r12] @ t = palmd[*pmd] - moveq r12,r7 - cmp r3, lr, lsl #26 - mov lr, lr, lsl #1 - ldrneh lr, [r9, lr] - moveq lr, r7 - strh r12,[r0], #2 - strh lr, [r0], #2 -.else - streqh r7, [r0] - cmp r3, lr, lsl #26 - streqh r7, [r0, #2] - add r0, r0, #4 -.endif - subs r8, r8, #2 - bgt 9b @ bg_mode - b 2b @ loop_inner -.endm - - -@ run length -@ unsigned short *dst, unsigned short *dram, int lines_sft_offs, int mdbg -.macro make_do_loop_rl name call_scan do_md -.global \name -\name: - stmfd sp!, {r4-r11,lr} - - ldr r11,=PicoDraw2FB - ldr r10,=Pico32xNativePal - ldr r11,[r11] - ldr r10,[r10] - ldr r9, =HighPal @ palmd - and r4, r2, #0xff - mov r5, #328 - lsl r3, #26 @ mdbg << 26 - mla r11,r4,r5,r11 @ r11 = pmd = PicoDraw2FB + offs*328: md data - call_scan_prep \call_scan - - mov r4, #0 @ line - b 1f @ loop_outer_entry - -0: @ loop_outer: - call_scan_end \call_scan - add r4, r4, #1 - sub r11,r11,#1 @ adjust for prev read - cmp r4, r2, lsr #16 - call_scan_fin_ge \call_scan - ldmgefd sp!, {r4-r11,pc} - -1: @ loop_outer_entry: - call_scan_begin \call_scan - mov r12,r4, lsl #1 - ldrh r12,[r1, r12] - add r11,r11,#8 - mov r6, #320 - add r5, r1, r12, lsl #1 @ p32x = dram + dram[l] - -2: @ loop_inner: - ldrh r8, [r5], #2 @ control word - and r12,r8, #0xff - mov r12,r12,lsl #1 - ldrh lr, [r10,r12] @ t = 32x pixel - eor lr, lr, #0x20 - -3: @ loop_innermost: - ldrb r7, [r11], #1 @ MD pixel - subs r6, r6, #1 - blt 0b @ loop_outer - cmp r3, r7, lsl #26 @ MD has bg pixel? - mov r7, r7, lsl #1 - tstne lr, #0x20 -.if \do_md - ldrneh r12,[r9, r7] @ t = palmd[*pmd] - streqh lr, [r0], #2 - strneh r12,[r0], #2 @ *dst++ = t -.else - streqh lr, [r0] - add r0, r0, #2 -.endif - subs r8, r8, #0x100 - bge 3b @ loop_innermost - b 2b @ loop_inner -.endm - - -make_do_loop_dc do_loop_dc, 0, 0 -make_do_loop_dc do_loop_dc_md, 0, 1 -make_do_loop_dc do_loop_dc_scan, 1, 0 -make_do_loop_dc do_loop_dc_scan_md, 1, 1 - -make_do_loop_pp do_loop_pp, 0, 0 -make_do_loop_pp do_loop_pp_md, 0, 1 -make_do_loop_pp do_loop_pp_scan, 1, 0 -make_do_loop_pp do_loop_pp_scan_md, 1, 1 - -make_do_loop_rl do_loop_rl, 0, 0 -make_do_loop_rl do_loop_rl_md, 0, 1 -make_do_loop_rl do_loop_rl_scan, 1, 0 -make_do_loop_rl do_loop_rl_scan_md, 1, 1 - -@ vim:filetype=armasm diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 8b9254f0..46cee678 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1,6 +1,7 @@ /* * PicoDrive * (C) notaz, 2009,2010,2013 + * (C) irixxxx, 2019-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -40,45 +41,58 @@ */ #include "../pico_int.h" #include "../memory.h" -#include "../../cpu/sh2/compiler.h" + +#include +DRC_DECLARE_SR; static const char str_mars[] = "MARS"; void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; struct Pico32xMem *Pico32xMem; -static void bank_switch(int b); +static void bank_switch_rom_68k(int b); + +static void (*m68k_write8_io)(u32 a, u32 d); +static void (*m68k_write16_io)(u32 a, u32 d); // addressing byte in 16bit reg -#define REG8IN16(ptr, offs) ((u8 *)ptr)[(offs) ^ 1] +#define REG8IN16(ptr, offs) ((u8 *)ptr)[MEM_BE2(offs)] // poll detection -#define POLL_THRESHOLD 3 +#define POLL_THRESHOLD 11 // Primal Rage speed, Blackthorne intro static struct { - u32 addr, cycles; + u32 addr1, addr2, cycles; int cnt; } m68k_poll; static int m68k_poll_detect(u32 a, u32 cycles, u32 flags) { int ret = 0; + // support polling on 2 addresses - seen in Wolfenstein + int match = (a - m68k_poll.addr1 <= 3 || a - m68k_poll.addr2 <= 3); - if (a - 2 <= m68k_poll.addr && m68k_poll.addr <= a + 2 - && cycles - m68k_poll.cycles <= 64 && !SekNotPolling) + if (match && CYCLES_GT(64, cycles - m68k_poll.cycles) && !SekNotPolling) { - if (m68k_poll.cnt++ > POLL_THRESHOLD) { + // detect split 32bit access by same cycle count, and ignore those + if (cycles != m68k_poll.cycles && ++m68k_poll.cnt >= POLL_THRESHOLD) { if (!(Pico32x.emu_flags & flags)) { elprintf(EL_32X, "m68k poll addr %08x, cyc %u", a, cycles - m68k_poll.cycles); - ret = 1; } Pico32x.emu_flags |= flags; + ret = 1; } } else { + // reset poll state in case of restart by interrupt + Pico32x.emu_flags &= ~(P32XF_68KCPOLL|P32XF_68KVPOLL); + SekSetStop(0); m68k_poll.cnt = 0; - m68k_poll.addr = a; + if (!match) { + m68k_poll.addr2 = m68k_poll.addr1; + m68k_poll.addr1 = a & ~1; + } SekNotPolling = 0; } m68k_poll.cycles = cycles; @@ -86,64 +100,215 @@ static int m68k_poll_detect(u32 a, u32 cycles, u32 flags) return ret; } -void p32x_m68k_poll_event(u32 flags) +void p32x_m68k_poll_event(u32 a, u32 flags) { - if (Pico32x.emu_flags & flags) { + int match = (a - m68k_poll.addr1 <= 3 || a - m68k_poll.addr2 <= 3); + + if ((Pico32x.emu_flags & flags) && match) { elprintf(EL_32X, "m68k poll %02x -> %02x", Pico32x.emu_flags, Pico32x.emu_flags & ~flags); Pico32x.emu_flags &= ~flags; SekSetStop(0); } - m68k_poll.addr = m68k_poll.cnt = 0; + + if (!(Pico32x.emu_flags & (P32XF_68KCPOLL|P32XF_68KVPOLL))) + m68k_poll.addr1 = m68k_poll.addr2 = m68k_poll.cnt = 0; } -static void sh2_poll_detect(SH2 *sh2, u32 a, u32 flags, int maxcnt) +void NOINLINE p32x_sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt) { - int cycles_left = sh2_cycles_left(sh2); + u32 cycles_done = sh2_cycles_done_t(sh2); + u32 cycles_diff = cycles_done - sh2->poll_cycles; - if (a == sh2->poll_addr && sh2->poll_cycles - cycles_left <= 10) { - if (sh2->poll_cnt++ > maxcnt) { + a &= ~0x20000000; + // reading 2 consecutive 16bit values is probably a 32bit access. detect this + // by checking address (max 2 bytes away) and cycles (max 2 cycles later). + // no polling if more than 20 cycles have passed since last detect call. + if (a - sh2->poll_addr <= 3 && CYCLES_GE(20, cycles_diff)) { + if (!sh2_not_polling(sh2) && CYCLES_GT(cycles_diff, 2) && + ++sh2->poll_cnt >= maxcnt) { if (!(sh2->state & flags)) elprintf_sh2(sh2, EL_32X, "state: %02x->%02x", sh2->state, sh2->state | flags); sh2->state |= flags; - sh2_end_run(sh2, 1); + sh2_end_run(sh2, 0); pevt_log_sh2(sh2, EVT_POLL_START); - return; +#ifdef DRC_SH2 + // mark this as an address used for polling if SDRAM + if ((a & 0xc6000000) == 0x06000000) { + unsigned char *p = sh2->p_drcblk_ram; + p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] |= 0x80; + // mark next word too to enable poll fifo for 32bit access + p[((a+2) & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] |= 0x80; + } +#endif } } - else + else if (!(sh2->state & (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL))) { sh2->poll_cnt = 0; - sh2->poll_addr = a; - sh2->poll_cycles = cycles_left; + sh2->poll_addr = a & ~1; + } + sh2->poll_cycles = cycles_done; + sh2_set_polling(sh2); } -void p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) +void NOINLINE p32x_sh2_poll_event(u32 a, SH2 *sh2, u32 flags, u32 m68k_cycles) { - if (sh2->state & flags) { + a &= ~0x20000000; + if ((sh2->state & flags) && a - sh2->poll_addr <= 3) { elprintf_sh2(sh2, EL_32X, "state: %02x->%02x", sh2->state, sh2->state & ~flags); - if (sh2->m68krcycles_done < m68k_cycles) + if (CYCLES_GT(m68k_cycles, sh2->m68krcycles_done) && !(sh2->state & SH2_STATE_RUN)) sh2->m68krcycles_done = m68k_cycles; pevt_log_sh2_o(sh2, EVT_POLL_END); + sh2->state &= ~flags; } - sh2->state &= ~flags; - sh2->poll_addr = sh2->poll_cycles = sh2->poll_cnt = 0; + if (!(sh2->state & (SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL))) + sh2->poll_addr = sh2->poll_cycles = sh2->poll_cnt = 0; } -static void sh2s_sync_on_read(SH2 *sh2) +static NOINLINE void sh2s_sync_on_read(SH2 *sh2, unsigned cycles) { - int cycles; if (sh2->poll_cnt != 0) return; - cycles = sh2_cycles_done(sh2); - if (cycles > 600) - p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + cycles / 3); + if (p32x_sh2_ready(sh2->other_sh2, cycles-250)) + p32x_sync_other_sh2(sh2, cycles); +} + +// poll fifo, stores writes to potential addresses used for polling. +// This is used to correctly deliver syncronisation data to the 3 cpus. The +// fifo stores 16 bit values, 8/32 bit accesses must be adapted accordingly. +#define PFIFO_SZ 4 +#define PFIFO_CNT 8 +struct sh2_poll_fifo { + u32 cycles; + u32 a; + u16 d; + int cpu; +} sh2_poll_fifo[PFIFO_CNT][PFIFO_SZ]; +unsigned sh2_poll_rd[PFIFO_CNT], sh2_poll_wr[PFIFO_CNT]; // ringbuffer pointers + +static NOINLINE u32 sh2_poll_read(u32 a, u32 d, unsigned int cycles, SH2* sh2) +{ + int hix = (a >> 1) % PFIFO_CNT; + struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix]; + struct sh2_poll_fifo *p; + int cpu = sh2 ? sh2->is_slave : -1; + unsigned idx; + + a &= ~0x20000000; // ignore writethrough bit + // fetch oldest write to address from fifo, but stop when reaching the present + idx = sh2_poll_rd[hix]; + while (idx != sh2_poll_wr[hix] && CYCLES_GE(cycles, fifo[idx].cycles)) { + p = &fifo[idx]; + idx = (idx+1) % PFIFO_SZ; + + if (cpu != p->cpu) { + if (CYCLES_GT(cycles, p->cycles+60)) { // ~180 sh2 cycles, Spiderman + // drop older fifo stores that may cause synchronisation problems. + p->a = -1; + } else if (p->a == a) { + // replace current data with fifo value and discard fifo entry + d = p->d; + p->a = -1; + break; + } + } + } + return d; +} + +static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) +{ + int hix = (a >> 1) % PFIFO_CNT; + struct sh2_poll_fifo *fifo = sh2_poll_fifo[hix]; + struct sh2_poll_fifo *q; + int cpu = sh2 ? sh2->is_slave : -1; + unsigned rd = sh2_poll_rd[hix], wr = sh2_poll_wr[hix]; + unsigned idx, nrd; + + a &= ~0x20000000; // ignore writethrough bit + + // throw out any values written by other cpus, plus heading cancelled stuff + for (idx = nrd = wr; idx != rd; ) { + idx = (idx-1) % PFIFO_SZ; + q = &fifo[idx]; + if (q->a == a && q->cpu != cpu) { q->a = -1; } + if (q->a != -1) { nrd = idx; } + } + rd = nrd; + + // fold 2 consecutive writes to the same address to avoid reading of + // intermediate values that may cause synchronisation problems. + // NB this can take an eternity on m68k: mov.b , needs + // 28 m68k-cycles (~80 sh2-cycles) to complete (observed in Metal Head) + q = &fifo[(sh2_poll_wr[hix]-1) % PFIFO_SZ]; + if (rd != wr && q->a == a && !CYCLES_GT(cycles,q->cycles + (cpu<0 ? 30:4))) { + q->d = d; + } else { + // store write to poll address in fifo + fifo[wr] = + (struct sh2_poll_fifo){ .cycles = cycles, .a = a, .d = d, .cpu = cpu }; + wr = (wr+1) % PFIFO_SZ; + if (wr == rd) + // fifo overflow, discard oldest value + rd = (rd+1) % PFIFO_SZ; + } + + sh2_poll_rd[hix] = rd; sh2_poll_wr[hix] = wr; +} + +u32 REGPARM(3) p32x_sh2_poll_memory8(u32 a, u32 d, SH2 *sh2) +{ + int shift = (a & 1 ? 0 : 8); + d = (s8)(p32x_sh2_poll_memory16(a & ~1, d << shift, sh2) >> shift); + return d; +} + +u32 REGPARM(3) p32x_sh2_poll_memory16(u32 a, u32 d, SH2 *sh2) +{ + unsigned char *p = sh2->p_drcblk_ram; + unsigned int cycles; + + DRC_SAVE_SR(sh2); + // is this a synchronisation address? + if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) { + cycles = sh2_cycles_done_m68k(sh2); + sh2s_sync_on_read(sh2, cycles); + // check poll fifo and sign-extend the result correctly + d = (s16)sh2_poll_read(a, d, cycles, sh2); + } + + p32x_sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 7); + + DRC_RESTORE_SR(sh2); + return d; +} + +u32 REGPARM(3) p32x_sh2_poll_memory32(u32 a, u32 d, SH2 *sh2) +{ + unsigned char *p = sh2->p_drcblk_ram; + unsigned int cycles; + + DRC_SAVE_SR(sh2); + // is this a synchronisation address? + if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) { + cycles = sh2_cycles_done_m68k(sh2); + sh2s_sync_on_read(sh2, cycles); + // check poll fifo and sign-extend the result correctly + d = (sh2_poll_read(a, d >> 16, cycles, sh2) << 16) | + ((u16)sh2_poll_read(a+2, d, cycles, sh2)); + } + + p32x_sh2_poll_detect(a, sh2, SH2_STATE_RPOLL, 7); + + DRC_RESTORE_SR(sh2); + return d; } // SH2 faking @@ -167,7 +332,7 @@ static u32 sh2_comm_faker(u32 a) static int f = 0; if (a == 0x28 && !p32x_csum_faked) { p32x_csum_faked = 1; - return *(unsigned short *)(Pico.rom + 0x18e); + return *(u16 *)(Pico.rom + 0x18e); } if (f >= sizeof(comm_fakevals) / sizeof(comm_fakevals[0])) f = 0; @@ -188,25 +353,19 @@ static u32 p32x_reg_read16(u32 a) #else if ((a & 0x30) == 0x20) { unsigned int cycles = SekCyclesDone(); - int comreg = 1 << (a & 0x0f) / 2; - if (cycles - msh2.m68krcycles_done > 244 - || (Pico32x.comm_dirty_68k & comreg)) + if (CYCLES_GT(cycles - msh2.m68krcycles_done, 244)) p32x_sync_sh2s(cycles); - if (Pico32x.comm_dirty_sh2 & comreg) - Pico32x.comm_dirty_sh2 &= ~comreg; - else if (m68k_poll_detect(a, cycles, P32XF_68KCPOLL)) { + if (m68k_poll_detect(a, cycles, P32XF_68KCPOLL)) SekSetStop(1); - SekEndRun(16); - } - goto out; + return sh2_poll_read(a, Pico32x.regs[a / 2], cycles, NULL); } #endif if (a == 2) { // INTM, INTS unsigned int cycles = SekCyclesDone(); - if (cycles - msh2.m68krcycles_done > 64) + if (CYCLES_GT(cycles - msh2.m68krcycles_done, 64)) p32x_sync_sh2s(cycles); goto out; } @@ -257,7 +416,10 @@ static void p32x_reg_write8(u32 a, u32 d) REG8IN16(r, 0x00) = d & 0x80; return; case 0x01: // adapter ctl: RES and ADEN writable - if ((d ^ r[0]) & d & P32XS_nRES) + if ((d ^ r[0]) & ~d & P32XS_ADEN) { + d |= P32XS_nRES; + Pico32xShutdown(); + } else if ((d ^ r[0]) & d & P32XS_nRES) p32x_reset_sh2s(); REG8IN16(r, 0x01) &= ~(P32XS_nRES|P32XS_ADEN); REG8IN16(r, 0x01) |= d & (P32XS_nRES|P32XS_ADEN); @@ -266,7 +428,7 @@ static void p32x_reg_write8(u32 a, u32 d) return; case 0x03: // irq ctl if ((d ^ r[0x02 / 2]) & 3) { - int cycles = SekCyclesDone(); + unsigned int cycles = SekCyclesDone(); p32x_sync_sh2s(cycles); r[0x02 / 2] = d & 3; p32x_update_cmd_irq(NULL, cycles); @@ -278,7 +440,7 @@ static void p32x_reg_write8(u32 a, u32 d) d &= 3; if (r[0x04 / 2] != d) { r[0x04 / 2] = d; - bank_switch(d); + bank_switch_rom_68k(d); } return; case 0x06: // ignored, always 0 @@ -341,6 +503,36 @@ static void p32x_reg_write8(u32 a, u32 d) case 0x1d: case 0x1e: case 0x1f: + return; + case 0x20: // comm port + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + { unsigned int cycles = SekCyclesDone(); + + if (CYCLES_GT(cycles - msh2.m68krcycles_done, 64)) + p32x_sync_sh2s(cycles); + + if (REG8IN16(r, a) != (u8)d) { + REG8IN16(r, a) = d; + p32x_sh2_poll_event(a, &sh2s[0], SH2_STATE_CPOLL, cycles); + p32x_sh2_poll_event(a, &sh2s[1], SH2_STATE_CPOLL, cycles); + sh2_poll_write(a & ~1, r[a / 2], cycles, NULL); + } + } + return; case 0x30: return; case 0x31: // PWM control @@ -380,27 +572,6 @@ static void p32x_reg_write8(u32 a, u32 d) p32x_pwm_write16(a & ~1, d, NULL, SekCyclesDone()); return; } - - if ((a & 0x30) == 0x20) { - int cycles = SekCyclesDone(); - int comreg; - - if (REG8IN16(r, a) == d) - return; - - comreg = 1 << (a & 0x0f) / 2; - if (Pico32x.comm_dirty_68k & comreg) - p32x_sync_sh2s(cycles); - - REG8IN16(r, a) = d; - p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); - p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); - Pico32x.comm_dirty_68k |= comreg; - - if (cycles - (int)msh2.m68krcycles_done > 120) - p32x_sync_sh2s(cycles); - return; - } } static void p32x_reg_write16(u32 a, u32 d) @@ -411,66 +582,72 @@ static void p32x_reg_write16(u32 a, u32 d) // for things like bset on comm port m68k_poll.cnt = 0; - switch (a) { - case 0x00: // adapter ctl - if ((d ^ r[0]) & d & P32XS_nRES) + switch (a/2) { + case 0x00/2: // adapter ctl + if ((d ^ r[0]) & ~d & P32XS_ADEN) { + d |= P32XS_nRES; + Pico32xShutdown(); + } else if ((d ^ r[0]) & d & P32XS_nRES) p32x_reset_sh2s(); r[0] &= ~(P32XS_FM|P32XS_nRES|P32XS_ADEN); r[0] |= d & (P32XS_FM|P32XS_nRES|P32XS_ADEN); return; - case 0x08: // DREQ src + case 0x08/2: // DREQ src r[a / 2] = d & 0xff; return; - case 0x0a: + case 0x0a/2: r[a / 2] = d & ~1; return; - case 0x0c: // DREQ dest + case 0x0c/2: // DREQ dest r[a / 2] = d & 0xff; return; - case 0x0e: + case 0x0e/2: r[a / 2] = d; return; - case 0x10: // DREQ len + case 0x10/2: // DREQ len r[a / 2] = d & ~3; return; - case 0x12: // FIFO reg + case 0x12/2: // FIFO reg dreq0_write(r, d); return; - case 0x1a: // TV + mystery bit + case 0x1a/2: // TV + mystery bit r[a / 2] = d & 0x0101; return; - case 0x30: // PWM control + case 0x20/2: // comm port + case 0x22/2: + case 0x24/2: + case 0x26/2: + case 0x28/2: + case 0x2a/2: + case 0x2c/2: + case 0x2e/2: + { unsigned int cycles = SekCyclesDone(); + + if (CYCLES_GT(cycles - msh2.m68krcycles_done, 64)) + p32x_sync_sh2s(cycles); + + if (r[a / 2] != (u16)d) { + r[a / 2] = d; + p32x_sh2_poll_event(a, &sh2s[0], SH2_STATE_CPOLL, cycles); + p32x_sh2_poll_event(a, &sh2s[1], SH2_STATE_CPOLL, cycles); + sh2_poll_write(a, (u16)d, cycles, NULL); + } + } + return; + case 0x30/2: // PWM control d = (r[a / 2] & ~0x0f) | (d & 0x0f); r[a / 2] = d; p32x_pwm_write16(a, d, NULL, SekCyclesDone()); return; - } - - // comm port - if ((a & 0x30) == 0x20) { - int cycles = SekCyclesDone(); - int comreg; - - if (r[a / 2] == d) + case 0x32/2: + case 0x34/2: + case 0x36/2: + case 0x38/2: + case 0x3a/2: + case 0x3c/2: + case 0x3e/2: + p32x_pwm_write16(a, d, NULL, SekCyclesDone()); return; - - comreg = 1 << (a & 0x0f) / 2; - if (Pico32x.comm_dirty_68k & comreg) - p32x_sync_sh2s(cycles); - - r[a / 2] = d; - p32x_sh2_poll_event(&sh2s[0], SH2_STATE_CPOLL, cycles); - p32x_sh2_poll_event(&sh2s[1], SH2_STATE_CPOLL, cycles); - Pico32x.comm_dirty_68k |= comreg; - - if (cycles - (int)msh2.m68krcycles_done > 120) - p32x_sync_sh2s(cycles); - return; - } - // PWM - else if ((a & 0x30) == 0x30) { - p32x_pwm_write16(a, d, NULL, SekCyclesDone()); - return; } p32x_reg_write8(a + 1, d); @@ -518,12 +695,12 @@ static void p32x_vdp_write8(u32 a, u32 d) r[4 / 2] = d & 0xff; break; case 0x0b: - d &= 1; + d &= P32XV_FS; Pico32x.pending_fb = d; // if we are blanking and FS bit is changing if (((r[0x0a/2] & P32XV_VBLK) || (r[0] & P32XV_Mx) == 0) && ((r[0x0a/2] ^ d) & P32XV_FS)) { r[0x0a/2] ^= P32XV_FS; - Pico32xSwapDRAM(d ^ 1); + Pico32xSwapDRAM(d ^ P32XV_FS); elprintf(EL_32X, "VDP FS: %d", r[0x0a/2] & P32XV_FS); } break; @@ -548,7 +725,7 @@ static void p32x_vdp_write16(u32 a, u32 d, SH2 *sh2) } Pico32x.vdp_regs[0x06 / 2] = a; Pico32x.vdp_regs[0x08 / 2] = d; - if (sh2 != NULL && len > 4) { + if (sh2 != NULL && len > 8) { Pico32x.vdp_regs[0x0a / 2] |= P32XV_nFEN; // supposedly takes 3 bus/6 sh2 cycles? or 3 sh2 cycles? p32x_event_schedule_sh2(sh2, P32X_EVENT_FILLEND, 3 + len); @@ -565,25 +742,27 @@ static void p32x_vdp_write16(u32 a, u32 d, SH2 *sh2) static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) { u16 *r = Pico32x.regs; + unsigned cycles; a &= 0x3e; - switch (a) { - case 0x00: // adapter/irq ctl + switch (a/2) { + case 0x00/2: // adapter/irq ctl return (r[0] & P32XS_FM) | Pico32x.sh2_regs[0] | Pico32x.sh2irq_mask[sh2->is_slave]; - case 0x04: // H count (often as comm too) - sh2_poll_detect(sh2, a, SH2_STATE_CPOLL, 3); - sh2s_sync_on_read(sh2); - return Pico32x.sh2_regs[4 / 2]; - case 0x06: + case 0x04/2: // H count (often as comm too) + p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 5); + cycles = sh2_cycles_done_m68k(sh2); + sh2s_sync_on_read(sh2, cycles); + return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], cycles, sh2); + case 0x06/2: return (r[a / 2] & ~P32XS_FULL) | 0x4000; - case 0x08: // DREQ src - case 0x0a: - case 0x0c: // DREQ dst - case 0x0e: - case 0x10: // DREQ len + case 0x08/2: // DREQ src + case 0x0a/2: + case 0x0c/2: // DREQ dst + case 0x0e/2: + case 0x10/2: // DREQ len return r[a / 2]; - case 0x12: // DREQ FIFO - does this work on hw? + case 0x12/2: // DREQ FIFO - does this work on hw? if (Pico32x.dmac0_fifo_ptr > 0) { Pico32x.dmac0_fifo_ptr--; r[a / 2] = Pico32x.dmac_fifo[0]; @@ -591,27 +770,35 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) Pico32x.dmac0_fifo_ptr * 2); } return r[a / 2]; - case 0x14: - case 0x16: - case 0x18: - case 0x1a: - case 0x1c: + case 0x14/2: + case 0x16/2: + case 0x18/2: + case 0x1a/2: + case 0x1c/2: return 0; // ? + case 0x20/2: // comm port + case 0x22/2: + case 0x24/2: + case 0x26/2: + case 0x28/2: + case 0x2a/2: + case 0x2c/2: + case 0x2e/2: + p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); + cycles = sh2_cycles_done_m68k(sh2); + sh2s_sync_on_read(sh2, cycles); + return sh2_poll_read(a, r[a / 2], cycles, sh2); + case 0x30/2: // PWM + case 0x32/2: + case 0x34/2: + case 0x36/2: + case 0x38/2: + case 0x3a/2: + case 0x3c/2: + case 0x3e/2: + return p32x_pwm_read16(a, sh2, sh2_cycles_done_m68k(sh2)); } - // comm port - if ((a & 0x30) == 0x20) { - int comreg = 1 << (a & 0x0f) / 2; - if (Pico32x.comm_dirty_68k & comreg) - Pico32x.comm_dirty_68k &= ~comreg; - else - sh2_poll_detect(sh2, a, SH2_STATE_CPOLL, 3); - sh2s_sync_on_read(sh2); - return r[a / 2]; - } - if ((a & 0x30) == 0x30) - return p32x_pwm_read16(a, sh2, sh2_cycles_done_m68k(sh2)); - elprintf_sh2(sh2, EL_32X|EL_ANOMALY, "unhandled sysreg r16 [%02x] @%08x", a, sh2_pc(sh2)); return 0; @@ -623,7 +810,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) u32 old; a &= 0x3f; - sh2->poll_addr = 0; + sh2->poll_cnt = 0; switch (a) { case 0x00: // FM @@ -639,7 +826,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) Pico32x.sh2_regs[0] &= ~0x80; Pico32x.sh2_regs[0] |= d & 0x80; - if ((d ^ old) & 1) + if ((old ^ d) & 1) p32x_pwm_schedule_sh2(sh2); if ((old ^ d) & 2) p32x_update_cmd_irq(sh2, 0); @@ -650,11 +837,40 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) return; case 0x05: // H count d &= 0xff; - if (Pico32x.sh2_regs[4 / 2] != d) { + if (Pico32x.sh2_regs[4 / 2] != (u8)d) { + unsigned int cycles = sh2_cycles_done_m68k(sh2); Pico32x.sh2_regs[4 / 2] = d; - p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, - sh2_cycles_done_m68k(sh2)); - sh2_end_run(sh2, 4); + p32x_sh2_poll_event(a, sh2->other_sh2, SH2_STATE_CPOLL, cycles); + if (p32x_sh2_ready(sh2->other_sh2, cycles+8)) + sh2_end_run(sh2, 4); + sh2_poll_write(a & ~1, d, cycles, sh2); + } + return; + case 0x20: // comm port + case 0x21: + case 0x22: + case 0x23: + case 0x24: + case 0x25: + case 0x26: + case 0x27: + case 0x28: + case 0x29: + case 0x2a: + case 0x2b: + case 0x2c: + case 0x2d: + case 0x2e: + case 0x2f: + if (REG8IN16(r, a) != (u8)d) { + unsigned int cycles = sh2_cycles_done_m68k(sh2); + + REG8IN16(r, a) = d; + p32x_m68k_poll_event(a, P32XF_68KCPOLL); + p32x_sh2_poll_event(a, sh2->other_sh2, SH2_STATE_CPOLL, cycles); + if (p32x_sh2_ready(sh2->other_sh2, cycles+8)) + sh2_end_run(sh2, 0); + sh2_poll_write(a & ~1, r[a / 2], cycles, sh2); } return; case 0x30: @@ -694,24 +910,10 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) case 0x3f: return; pwm_write: - p32x_pwm_write16(a & ~1, d, sh2, 0); + p32x_pwm_write16(a & ~1, d, sh2, sh2_cycles_done_m68k(sh2)); return; } - if ((a & 0x30) == 0x20) { - int comreg; - if (REG8IN16(r, a) == d) - return; - - REG8IN16(r, a) = d; - p32x_m68k_poll_event(P32XF_68KCPOLL); - p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, - sh2_cycles_done_m68k(sh2)); - comreg = 1 << (a & 0x0f) / 2; - Pico32x.comm_dirty_sh2 |= comreg; - return; - } - elprintf(EL_32X|EL_ANOMALY, "unhandled sysreg w8 [%02x] %02x @%08x", a, d, sh2_pc(sh2)); } @@ -720,51 +922,60 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) { a &= 0x3e; - sh2->poll_addr = 0; + sh2->poll_cnt = 0; - // comm - if ((a & 0x30) == 0x20) { - int comreg; - if (Pico32x.regs[a / 2] == d) - return; - - Pico32x.regs[a / 2] = d; - p32x_m68k_poll_event(P32XF_68KCPOLL); - p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, - sh2_cycles_done_m68k(sh2)); - comreg = 1 << (a & 0x0f) / 2; - Pico32x.comm_dirty_sh2 |= comreg; - return; - } - // PWM - else if ((a & 0x30) == 0x30) { - p32x_pwm_write16(a, d, sh2, sh2_cycles_done_m68k(sh2)); - return; - } - - switch (a) { - case 0: // FM + switch (a/2) { + case 0x00/2: // FM Pico32x.regs[0] &= ~P32XS_FM; Pico32x.regs[0] |= d & P32XS_FM; break; - case 0x14: - Pico32x.sh2irqs &= ~P32XI_VRES; + case 0x14/2: + Pico32x.sh2irqi[sh2->is_slave] &= ~P32XI_VRES; goto irls; - case 0x16: + case 0x16/2: Pico32x.sh2irqi[sh2->is_slave] &= ~P32XI_VINT; goto irls; - case 0x18: + case 0x18/2: Pico32x.sh2irqi[sh2->is_slave] &= ~P32XI_HINT; goto irls; - case 0x1a: + case 0x1a/2: Pico32x.regs[2 / 2] &= ~(1 << sh2->is_slave); p32x_update_cmd_irq(sh2, 0); return; - case 0x1c: + case 0x1c/2: p32x_pwm_sync_to_sh2(sh2); Pico32x.sh2irqi[sh2->is_slave] &= ~P32XI_PWM; p32x_pwm_schedule_sh2(sh2); goto irls; + case 0x20/2: // comm port + case 0x22/2: + case 0x24/2: + case 0x26/2: + case 0x28/2: + case 0x2a/2: + case 0x2c/2: + case 0x2e/2: + if (Pico32x.regs[a / 2] != (u16)d) { + unsigned int cycles = sh2_cycles_done_m68k(sh2); + + Pico32x.regs[a / 2] = d; + p32x_m68k_poll_event(a, P32XF_68KCPOLL); + p32x_sh2_poll_event(a, sh2->other_sh2, SH2_STATE_CPOLL, cycles); + if (p32x_sh2_ready(sh2->other_sh2, cycles+8)) + sh2_end_run(sh2, 0); + sh2_poll_write(a, d, cycles, sh2); + } + return; + case 0x30/2: // PWM + case 0x32/2: + case 0x34/2: + case 0x36/2: + case 0x38/2: + case 0x3a/2: + case 0x3c/2: + case 0x3e/2: + p32x_pwm_write16(a, d, sh2, sh2_cycles_done_m68k(sh2)); + return; } p32x_sh2reg_write8(a | 1, d, sh2); @@ -787,7 +998,7 @@ static u32 PicoRead8_32x_on(u32 a) } if ((a & 0xfc00) != 0x5000) { - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) return PicoRead8_mcd_io(a); else return PicoRead8_io(a); @@ -831,7 +1042,7 @@ static u32 PicoRead16_32x_on(u32 a) } if ((a & 0xfc00) != 0x5000) { - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) return PicoRead16_mcd_io(a); else return PicoRead16_io(a); @@ -871,12 +1082,7 @@ static void PicoWrite8_32x_on(u32 a, u32 d) } if ((a & 0xfc00) != 0x5000) { - if (PicoAHW & PAHW_MCD) - PicoWrite8_mcd_io(a, d); - else - PicoWrite8_io(a, d); - if (a == 0xa130f1) - bank_switch(Pico32x.regs[4 / 2]); + m68k_write8_io(a, d); return; } @@ -889,7 +1095,7 @@ static void PicoWrite8_32x_on(u32 a, u32 d) // TODO: verify if ((a & 0xfe00) == 0x5200) { // a15200 elprintf(EL_32X|EL_ANOMALY, "m68k 32x PAL w8 [%06x] %02x @%06x", a, d & 0xff, SekPc); - ((u8 *)Pico32xMem->pal)[(a & 0x1ff) ^ 1] = d; + ((u8 *)Pico32xMem->pal)[MEM_BE2(a & 0x1ff)] = d; Pico32x.dirty_pal = 1; return; } @@ -898,6 +1104,27 @@ static void PicoWrite8_32x_on(u32 a, u32 d) elprintf(EL_UIO, "m68k unmapped w8 [%06x] %02x @%06x", a, d & 0xff, SekPc); } +static void PicoWrite8_32x_on_io(u32 a, u32 d) +{ + PicoWrite8_io(a, d); + if (a == 0xa130f1) + bank_switch_rom_68k(Pico32x.regs[4 / 2]); +} + +static void PicoWrite8_32x_on_io_cd(u32 a, u32 d) +{ + PicoWrite8_mcd_io(a, d); + if (a == 0xa130f1) + bank_switch_rom_68k(Pico32x.regs[4 / 2]); +} + +static void PicoWrite8_32x_on_io_ssf2(u32 a, u32 d) +{ + carthw_ssf2_write8(a, d); + if ((a & ~0x0e) == 0xa130f1) + bank_switch_rom_68k(Pico32x.regs[4 / 2]); +} + static void PicoWrite16_32x_on(u32 a, u32 d) { if ((a & 0xfc00) == 0x5000) @@ -909,12 +1136,7 @@ static void PicoWrite16_32x_on(u32 a, u32 d) } if ((a & 0xfc00) != 0x5000) { - if (PicoAHW & PAHW_MCD) - PicoWrite16_mcd_io(a, d); - else - PicoWrite16_io(a, d); - if (a == 0xa130f0) - bank_switch(Pico32x.regs[4 / 2]); + m68k_write16_io(a, d); return; } @@ -934,19 +1156,45 @@ static void PicoWrite16_32x_on(u32 a, u32 d) elprintf(EL_UIO, "m68k unmapped w16 [%06x] %04x @%06x", a, d & 0xffff, SekPc); } +static void PicoWrite16_32x_on_io(u32 a, u32 d) +{ + PicoWrite16_io(a, d); + if (a == 0xa130f0) + bank_switch_rom_68k(Pico32x.regs[4 / 2]); +} + +static void PicoWrite16_32x_on_io_cd(u32 a, u32 d) +{ + PicoWrite16_mcd_io(a, d); + if (a == 0xa130f0) + bank_switch_rom_68k(Pico32x.regs[4 / 2]); +} + +static void PicoWrite16_32x_on_io_ssf2(u32 a, u32 d) +{ + PicoWrite16_io(a, d); + if ((a & ~0x0f) == 0xa130f0) { + carthw_ssf2_write8(a + 1, d); + bank_switch_rom_68k(Pico32x.regs[4 / 2]); + } +} + // before ADEN u32 PicoRead8_32x(u32 a) { u32 d = 0; - if ((a & 0xffc0) == 0x5100) { // a15100 - // regs are always readable - d = ((u8 *)Pico32x.regs)[(a & 0x3f) ^ 1]; - goto out; - } - if ((a & 0xfffc) == 0x30ec) { // a130ec - d = str_mars[a & 3]; - goto out; + if (PicoIn.opt & POPT_EN_32X) { + if ((a & 0xffc0) == 0x5100) { // a15100 + // regs are always readable + d = ((u8 *)Pico32x.regs)[MEM_BE2(a & 0x3f)]; + goto out; + } + + if ((a & 0xfffc) == 0x30ec) { // a130ec + d = str_mars[a & 3]; + goto out; + } } elprintf(EL_UIO, "m68k unmapped r8 [%06x] @%06x", a, SekPc); @@ -960,14 +1208,17 @@ out: u32 PicoRead16_32x(u32 a) { u32 d = 0; - if ((a & 0xffc0) == 0x5100) { // a15100 - d = Pico32x.regs[(a & 0x3f) / 2]; - goto out; - } - if ((a & 0xfffc) == 0x30ec) { // a130ec - d = !(a & 2) ? ('M'<<8)|'A' : ('R'<<8)|'S'; - goto out; + if (PicoIn.opt & POPT_EN_32X) { + if ((a & 0xffc0) == 0x5100) { // a15100 + d = Pico32x.regs[(a & 0x3f) / 2]; + goto out; + } + + if ((a & 0xfffc) == 0x30ec) { // a130ec + d = !(a & 2) ? ('M'<<8)|'A' : ('R'<<8)|'S'; + goto out; + } } elprintf(EL_UIO, "m68k unmapped r16 [%06x] @%06x", a, SekPc); @@ -980,27 +1231,49 @@ out: void PicoWrite8_32x(u32 a, u32 d) { - if ((a & 0xffc0) == 0x5100) { // a15100 + if ((PicoIn.opt & POPT_EN_32X) && (a & 0xffc0) == 0x5100) // a15100 + { u16 *r = Pico32x.regs; + u8 *r8 = (u8 *)r; elprintf(EL_32X, "m68k 32x w8 [%06x] %02x @%06x", a, d & 0xff, SekPc); a &= 0x3f; - if (a == 1) { - if ((d ^ r[0]) & d & P32XS_ADEN) { - Pico32xStartup(); - r[0] &= ~P32XS_nRES; // causes reset if specified by this write - r[0] |= P32XS_ADEN; - p32x_reg_write8(a, d); // forward for reset processing - } - return; + switch (a) { + case 0x00: + r8[MEM_BE2(a)] = d & (P32XS_FM>>8); + return; + case 0x01: + if ((d ^ r[0]) & d & P32XS_ADEN) { + Pico32xStartup(); + r[0] &= ~P32XS_nRES; // causes reset if specified by this write + r[0] |= P32XS_ADEN; + p32x_reg_write8(a, d); // forward for reset processing + } + else { + r[0] &= ~(P32XS_nRES|P32XS_ADEN); + r[0] |= d & (P32XS_nRES|P32XS_ADEN); + } + return; + case 0x03: r8[MEM_BE2(a)] = d & 3; return; + case 0x05: r8[MEM_BE2(a)] = d & 3; return; + case 0x07: r8[MEM_BE2(a)] = d & 7; return; + case 0x09: r8[MEM_BE2(a)] = d ; return; + case 0x0a: r8[MEM_BE2(a)] = d ; return; + case 0x0b: r8[MEM_BE2(a)] = d & 0xfe; return; + case 0x0d: r8[MEM_BE2(a)] = d ; return; + case 0x0e: r8[MEM_BE2(a)] = d ; return; + case 0x0f: r8[MEM_BE2(a)] = d ; return; + case 0x10: r8[MEM_BE2(a)] = d ; return; + case 0x11: r8[MEM_BE2(a)] = d & 0xfc; return; + case 0x1a: r8[MEM_BE2(a)] = d & 1; return; + case 0x1b: r8[MEM_BE2(a)] = d & 1; return; + case 0x20: case 0x21: case 0x22: case 0x23: // COMM + case 0x24: case 0x25: case 0x26: case 0x27: + case 0x28: case 0x29: case 0x2a: case 0x2b: + case 0x2c: case 0x2d: case 0x2e: case 0x2f: + r8[MEM_BE2(a)] = d; + return; } - - // allow only COMM for now - if ((a & 0x30) == 0x20) { - u8 *r8 = (u8 *)r; - r8[a ^ 1] = d; - } - return; } elprintf(EL_UIO, "m68k unmapped w8 [%06x] %02x @%06x", a, d & 0xff, SekPc); @@ -1008,66 +1281,83 @@ void PicoWrite8_32x(u32 a, u32 d) void PicoWrite16_32x(u32 a, u32 d) { - if ((a & 0xffc0) == 0x5100) { // a15100 + if ((PicoIn.opt & POPT_EN_32X) && (a & 0xffc0) == 0x5100) // a15100 + { u16 *r = Pico32x.regs; - elprintf(EL_UIO, "m68k 32x w16 [%06x] %04x @%06x", a, d & 0xffff, SekPc); + elprintf(EL_32X, "m68k 32x w16 [%06x] %04x @%06x", a, d & 0xffff, SekPc); a &= 0x3e; - if (a == 0) { - if ((d ^ r[0]) & d & P32XS_ADEN) { - Pico32xStartup(); - r[0] &= ~P32XS_nRES; // causes reset if specified by this write - r[0] |= P32XS_ADEN; - p32x_reg_write16(a, d); // forward for reset processing - } - return; + switch (a) { + case 0x00: + if ((d ^ r[0]) & d & P32XS_ADEN) { + Pico32xStartup(); + r[0] &= ~(P32XS_FM|P32XS_nRES|P32XS_ADEN); + // causes reset if specified by this write + r[0] |= d & (P32XS_FM|P32XS_ADEN); + p32x_reg_write16(a, d); // forward for reset processing + } + else { + r[0] &= ~(P32XS_FM|P32XS_nRES|P32XS_ADEN); + r[0] |= d & (P32XS_FM|P32XS_nRES|P32XS_ADEN); + } + return; + case 0x02: r[a / 2] = d & 3; return; + case 0x04: r[a / 2] = d & 3; return; + case 0x06: r[a / 2] = d & 7; return; + case 0x08: r[a / 2] = d & 0x00ff; return; + case 0x0a: r[a / 2] = d & 0xfffe; return; + case 0x0c: r[a / 2] = d & 0x00ff; return; + case 0x0e: r[a / 2] = d ; return; + case 0x10: r[a / 2] = d & 0xfffc; return; + case 0x1a: r[a / 2] = d & 0x0101; return; + case 0x20: case 0x22: // COMM + case 0x24: case 0x26: + case 0x28: case 0x2a: + case 0x2c: case 0x2e: + r[a / 2] = d; + return; } - - // allow only COMM for now - if ((a & 0x30) == 0x20) - r[a / 2] = d; - return; } elprintf(EL_UIO, "m68k unmapped w16 [%06x] %04x @%06x", a, d & 0xffff, SekPc); } /* quirk: in both normal and overwrite areas only nonzero values go through */ -#define sh2_write8_dramN(n) \ +#define sh2_write8_dramN(p, a, d) \ if ((d & 0xff) != 0) { \ - u8 *dram = (u8 *)Pico32xMem->dram[n]; \ - dram[(a & 0x1ffff) ^ 1] = d; \ + u8 *dram = (u8 *)p; \ + dram[MEM_BE2(a & 0x1ffff)] = d; \ } static void m68k_write8_dram0_ow(u32 a, u32 d) { - sh2_write8_dramN(0); + sh2_write8_dramN(Pico32xMem->dram[0], a, d); } static void m68k_write8_dram1_ow(u32 a, u32 d) { - sh2_write8_dramN(1); + sh2_write8_dramN(Pico32xMem->dram[1], a, d); } -#define sh2_write16_dramN(n) \ - u16 *pd = &Pico32xMem->dram[n][(a & 0x1ffff) / 2]; \ +#define sh2_write16_dramN(p, a, d) \ + u16 *pd = &((u16 *)p)[(a & 0x1ffff) / 2]; \ if (!(a & 0x20000)) { \ *pd = d; \ - return; \ - } \ - /* overwrite */ \ - if (!(d & 0xff00)) d |= *pd & 0xff00; \ - if (!(d & 0x00ff)) d |= *pd & 0x00ff; \ - *pd = d; + } else { \ + u16 v = *pd; /* overwrite */ \ + if (!(d & 0x00ff)) d |= v & 0x00ff; \ + if (!(d & 0xff00)) d |= v & 0xff00; \ + *pd = d; \ + } static void m68k_write16_dram0_ow(u32 a, u32 d) { - sh2_write16_dramN(0); + sh2_write16_dramN(Pico32xMem->dram[0], a, d); } static void m68k_write16_dram1_ow(u32 a, u32 d) { - sh2_write16_dramN(1); + sh2_write16_dramN(Pico32xMem->dram[1], a, d); } // ----------------------------------------------------------------- @@ -1076,7 +1366,7 @@ static void m68k_write16_dram1_ow(u32 a, u32 d) static void PicoWrite8_hint(u32 a, u32 d) { if ((a & 0xfffc) == 0x0070) { - Pico32xMem->m68k_rom[a ^ 1] = d; + Pico32xMem->m68k_rom[MEM_BE2(a)] = d; return; } @@ -1147,19 +1437,18 @@ static void PicoWrite16_bank(u32 a, u32 d) static void bank_map_handler(void) { - cpu68k_map_set(m68k_read8_map, 0x900000, 0x9fffff, PicoRead8_bank, 1); - cpu68k_map_set(m68k_read16_map, 0x900000, 0x9fffff, PicoRead16_bank, 1); + cpu68k_map_read_funcs(0x900000, 0x9fffff, PicoRead8_bank, PicoRead16_bank, 0); } -static void bank_switch(int b) +static void bank_switch_rom_68k(int b) { - unsigned int rs, bank; + unsigned int rs, bank, bank2; if (Pico.m.ncart_in) return; bank = b << 20; - if ((Pico.m.sram_reg & SRR_MAPPED) && bank == SRam.start) { + if ((Pico.m.sram_reg & SRR_MAPPED) && bank == Pico.sv.start) { bank_map_handler(); return; } @@ -1170,21 +1459,22 @@ static void bank_switch(int b) return; } - // 32X ROM (unbanked, XXX: consider mirroring?) + // 32X ROM (XXX: consider mirroring?) rs = (Pico.romsize + M68K_BANK_MASK) & ~M68K_BANK_MASK; - rs -= bank; - if (rs > 0x100000) - rs = 0x100000; - cpu68k_map_set(m68k_read8_map, 0x900000, 0x900000 + rs - 1, Pico.rom + bank, 0); - cpu68k_map_set(m68k_read16_map, 0x900000, 0x900000 + rs - 1, Pico.rom + bank, 0); - - elprintf(EL_32X, "bank %06x-%06x -> %06x", 0x900000, 0x900000 + rs - 1, bank); - -#ifdef EMU_F68K - // setup FAME fetchmap - for (rs = 0x90; rs < 0xa0; rs++) - PicoCpuFM68k.Fetch[rs] = (unsigned long)Pico.rom + bank - 0x900000; -#endif + if (!carthw_ssf2_active) { + rs -= bank; + if (rs > 0x100000) + rs = 0x100000; + cpu68k_map_read_mem(0x900000, 0x900000 + rs - 1, Pico.rom + bank, 0); + elprintf(EL_32X, "bank %06x-%06x -> %06x", 0x900000, 0x900000 + rs - 1, bank); + } + else { + bank = bank >> 19; + bank2 = carthw_ssf2_banks[bank + 0] << 19; + cpu68k_map_read_mem(0x900000, 0x97ffff, Pico.rom + bank2, 0); + bank2 = carthw_ssf2_banks[bank + 1] << 19; + cpu68k_map_read_mem(0x980000, 0x9fffff, Pico.rom + bank2, 0); + } } // ----------------------------------------------------------------- @@ -1192,20 +1482,21 @@ static void bank_switch(int b) // ----------------------------------------------------------------- // read8 -static u32 sh2_read8_unmapped(u32 a, SH2 *sh2) +static REGPARM(2) u32 sh2_read8_unmapped(u32 a, SH2 *sh2) { elprintf_sh2(sh2, EL_32X, "unmapped r8 [%08x] %02x @%06x", a, 0, sh2_pc(sh2)); return 0; } -static u32 sh2_read8_cs0(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read8_cs0(u32 a, SH2 *sh2) { u32 d = 0; + DRC_SAVE_SR(sh2); sh2_burn_cycles(sh2, 1*2); - // 0x3ffc0 is veridied + // 0x3ffc0 is verified if ((a & 0x3ffc0) == 0x4000) { d = p32x_sh2reg_read16(a, sh2); goto out_16to8; @@ -1213,22 +1504,23 @@ static u32 sh2_read8_cs0(u32 a, SH2 *sh2) if ((a & 0x3fff0) == 0x4100) { d = p32x_vdp_read16(a); - sh2_poll_detect(sh2, a, SH2_STATE_VPOLL, 7); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); goto out_16to8; } - // TODO: mirroring? - if (!sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_m)) - return Pico32xMem->sh2_rom_m.b[a ^ 1]; - if (sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_s)) - return Pico32xMem->sh2_rom_s.b[a ^ 1]; - if ((a & 0x3fe00) == 0x4200) { d = Pico32xMem->pal[(a & 0x1ff) / 2]; goto out_16to8; } - return sh2_read8_unmapped(a, sh2); + // TODO: mirroring? + if (!sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_m)) + d = Pico32xMem->sh2_rom_m.b[MEM_BE2(a)]; + else if (sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_s)) + d = Pico32xMem->sh2_rom_s.b[MEM_BE2(a)]; + else + d = sh2_read8_unmapped(a, sh2); + goto out; out_16to8: if (a & 1) @@ -1236,67 +1528,138 @@ out_16to8: else d >>= 8; +out: elprintf_sh2(sh2, EL_32X, "r8 [%08x] %02x @%06x", a, d, sh2_pc(sh2)); - return d; + DRC_RESTORE_SR(sh2); + return (s8)d; } -static u32 sh2_read8_da(u32 a, SH2 *sh2) +// for ssf2 +static u32 REGPARM(2) sh2_read8_rom(u32 a, SH2 *sh2) { - return sh2->data_array[(a & 0xfff) ^ 1]; + u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; + s8 *p = sh2->p_rom; + return p[MEM_BE2(bank + (a & 0x7ffff))]; } // read16 -static u32 sh2_read16_unmapped(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read16_unmapped(u32 a, SH2 *sh2) { elprintf_sh2(sh2, EL_32X, "unmapped r16 [%08x] %04x @%06x", a, 0, sh2_pc(sh2)); return 0; } -static u32 sh2_read16_cs0(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read16_cs0(u32 a, SH2 *sh2) { u32 d = 0; + DRC_SAVE_SR(sh2); sh2_burn_cycles(sh2, 1*2); if ((a & 0x3ffc0) == 0x4000) { d = p32x_sh2reg_read16(a, sh2); if (!(EL_LOGMASK & EL_PWM) && (a & 0x30) == 0x30) // hide PWM - return d; + goto out_noprint; goto out; } if ((a & 0x3fff0) == 0x4100) { d = p32x_vdp_read16(a); - sh2_poll_detect(sh2, a, SH2_STATE_VPOLL, 7); + p32x_sh2_poll_detect(a, sh2, SH2_STATE_VPOLL, 9); goto out; } - if (!sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_m)) - return Pico32xMem->sh2_rom_m.w[a / 2]; - if (sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_s)) - return Pico32xMem->sh2_rom_s.w[a / 2]; - if ((a & 0x3fe00) == 0x4200) { d = Pico32xMem->pal[(a & 0x1ff) / 2]; goto out; } - return sh2_read16_unmapped(a, sh2); + if (!sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_m)) + d = Pico32xMem->sh2_rom_m.w[a / 2]; + else if (sh2->is_slave && a < sizeof(Pico32xMem->sh2_rom_s)) + d = Pico32xMem->sh2_rom_s.w[a / 2]; + else + d = sh2_read16_unmapped(a, sh2); out: elprintf_sh2(sh2, EL_32X, "r16 [%08x] %04x @%06x", a, d, sh2_pc(sh2)); - return d; +out_noprint: + DRC_RESTORE_SR(sh2); + return (s16)d; } -static u32 sh2_read16_da(u32 a, SH2 *sh2) +static u32 REGPARM(2) sh2_read16_rom(u32 a, SH2 *sh2) { - return ((u16 *)sh2->data_array)[(a & 0xfff) / 2]; + u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; + s16 *p = sh2->p_rom; + return p[(bank + (a & 0x7fffe)) / 2]; +} + +static u32 REGPARM(2) sh2_read32_unmapped(u32 a, SH2 *sh2) +{ + elprintf_sh2(sh2, EL_32X, "unmapped r32 [%08x] %08x @%06x", + a, 0, sh2_pc(sh2)); + return 0; +} + +static u32 REGPARM(2) sh2_read32_cs0(u32 a, SH2 *sh2) +{ + u32 d1 = sh2_read16_cs0(a, sh2) << 16, d2 = sh2_read16_cs0(a + 2, sh2) << 16; + return d1 | (d2 >> 16); +} + +static u32 REGPARM(2) sh2_read32_rom(u32 a, SH2 *sh2) +{ + u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; + u32 *p = sh2->p_rom; + u32 d = p[(bank + (a & 0x7fffc)) / 4]; + return CPU_BE2(d); } // writes +#ifdef DRC_SH2 +static void sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) +{ + unsigned cycles; + + DRC_SAVE_SR(sh2); + cycles = sh2_cycles_done_m68k(sh2); + sh2_poll_write(a, d, cycles, sh2); + p32x_sh2_poll_event(a, sh2->other_sh2, SH2_STATE_RPOLL, cycles); + if (p32x_sh2_ready(sh2->other_sh2, cycles+8)) + sh2_end_run(sh2, 0); + DRC_RESTORE_SR(sh2); +} + +void NOINLINE sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, u32 t) +{ + if (t & 0x80) sh2_sdram_poll(a, d, sh2); + if (t & 0x7f) sh2_drc_wcheck_ram(a, 2, sh2); +} + +void NOINLINE sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, u32 t) +{ + if (t & 0x000080) sh2_sdram_poll(a, d>>16, sh2); + if (t & 0x800000) sh2_sdram_poll(a+2, d, sh2); + if (t & ~0x800080) sh2_drc_wcheck_ram(a, 4, sh2); +} + +#ifndef _ASM_32X_MEMORY_C +static void sh2_da_checks(u32 a, u32 t, SH2 *sh2) +{ + sh2_drc_wcheck_da(a, 2, sh2); +} + +static void sh2_da_checks_l(u32 a, u32 t, SH2 *sh2) +{ + sh2_drc_wcheck_da(a, 4, sh2); +} +#endif +#endif + static void REGPARM(3) sh2_write_ignore(u32 a, u32 d, SH2 *sh2) { } @@ -1310,66 +1673,69 @@ static void REGPARM(3) sh2_write8_unmapped(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write8_cs0(u32 a, u32 d, SH2 *sh2) { + DRC_SAVE_SR(sh2); elprintf_sh2(sh2, EL_32X, "w8 [%08x] %02x @%06x", a, d & 0xff, sh2_pc(sh2)); + if ((a & 0x3ffc0) == 0x4000) { + p32x_sh2reg_write8(a, d, sh2); + goto out; + } + if (Pico32x.regs[0] & P32XS_FM) { if ((a & 0x3fff0) == 0x4100) { - sh2->poll_addr = 0; + sh2->poll_cnt = 0; p32x_vdp_write8(a, d); - return; + goto out; + } + + if ((a & 0x3fe00) == 0x4200) { + sh2->poll_cnt = 0; + ((u8 *)Pico32xMem->pal)[MEM_BE2(a & 0x1ff)] = d; + Pico32x.dirty_pal = 1; + goto out; } } - if ((a & 0x3ffc0) == 0x4000) { - p32x_sh2reg_write8(a, d, sh2); - return; - } - sh2_write8_unmapped(a, d, sh2); +out: + DRC_RESTORE_SR(sh2); } -static void REGPARM(3) sh2_write8_dram0(u32 a, u32 d, SH2 *sh2) +#ifdef _ASM_32X_MEMORY_C +extern void REGPARM(3) sh2_write8_dram(u32 a, u32 d, SH2 *sh2); +extern void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2); +extern void REGPARM(3) sh2_write8_da(u32 a, u32 d, SH2 *sh2); +#else +static void REGPARM(3) sh2_write8_dram(u32 a, u32 d, SH2 *sh2) { - sh2_write8_dramN(0); -} - -static void REGPARM(3) sh2_write8_dram1(u32 a, u32 d, SH2 *sh2) -{ - sh2_write8_dramN(1); + sh2_write8_dramN(sh2->p_dram, a, d); } static void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2) { - u32 a1 = a & 0x3ffff; + u32 a1 = MEM_BE2(a & 0x3ffff); + ((u8 *)sh2->p_sdram)[a1] = d; #ifdef DRC_SH2 - int t = Pico32xMem->drcblk_ram[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u8 *p = sh2->p_drcblk_ram; + u32 t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) - sh2_drc_wcheck_ram(a, t, sh2->is_slave); + sh2_sdram_checks(a & ~1, ((u16 *)sh2->p_sdram)[a1 / 2], sh2, t); #endif - Pico32xMem->sdram[a1 ^ 1] = d; -} - -static void REGPARM(3) sh2_write8_sdram_wt(u32 a, u32 d, SH2 *sh2) -{ - // xmen sync hack.. - if (a < 0x26000200) - sh2_end_run(sh2, 32); - - sh2_write8_sdram(a, d, sh2); } static void REGPARM(3) sh2_write8_da(u32 a, u32 d, SH2 *sh2) { - u32 a1 = a & 0xfff; + u32 a1 = MEM_BE2(a & 0xfff); + sh2->data_array[a1] = d; #ifdef DRC_SH2 - int id = sh2->is_slave; - int t = Pico32xMem->drcblk_da[id][a1 >> SH2_DRCBLK_DA_SHIFT]; + u8 *p = sh2->p_drcblk_da; + u32 t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) - sh2_drc_wcheck_da(a, t, id); + sh2_da_checks(a, t, sh2); #endif - sh2->data_array[a1 ^ 1] = d; } +#endif // write16 static void REGPARM(3) sh2_write16_unmapped(u32 a, u32 d, SH2 *sh2) @@ -1380,67 +1746,155 @@ static void REGPARM(3) sh2_write16_unmapped(u32 a, u32 d, SH2 *sh2) static void REGPARM(3) sh2_write16_cs0(u32 a, u32 d, SH2 *sh2) { + DRC_SAVE_SR(sh2); if (((EL_LOGMASK & EL_PWM) || (a & 0x30) != 0x30)) // hide PWM elprintf_sh2(sh2, EL_32X, "w16 [%08x] %04x @%06x", a, d & 0xffff, sh2_pc(sh2)); + if ((a & 0x3ffc0) == 0x4000) { + p32x_sh2reg_write16(a, d, sh2); + goto out; + } + if (Pico32x.regs[0] & P32XS_FM) { if ((a & 0x3fff0) == 0x4100) { - sh2->poll_addr = 0; + sh2->poll_cnt = 0; p32x_vdp_write16(a, d, sh2); - return; + goto out; } if ((a & 0x3fe00) == 0x4200) { + sh2->poll_cnt = 0; Pico32xMem->pal[(a & 0x1ff) / 2] = d; Pico32x.dirty_pal = 1; - return; + goto out; } } - if ((a & 0x3ffc0) == 0x4000) { - p32x_sh2reg_write16(a, d, sh2); - return; - } - sh2_write16_unmapped(a, d, sh2); +out: + DRC_RESTORE_SR(sh2); } -static void REGPARM(3) sh2_write16_dram0(u32 a, u32 d, SH2 *sh2) +#ifdef _ASM_32X_MEMORY_C +extern void REGPARM(3) sh2_write16_dram(u32 a, u32 d, SH2 *sh2); +extern void REGPARM(3) sh2_write16_sdram(u32 a, u32 d, SH2 *sh2); +extern void REGPARM(3) sh2_write16_da(u32 a, u32 d, SH2 *sh2); +#else +static void REGPARM(3) sh2_write16_dram(u32 a, u32 d, SH2 *sh2) { - sh2_write16_dramN(0); -} - -static void REGPARM(3) sh2_write16_dram1(u32 a, u32 d, SH2 *sh2) -{ - sh2_write16_dramN(1); + sh2_write16_dramN(sh2->p_dram, a, d); } static void REGPARM(3) sh2_write16_sdram(u32 a, u32 d, SH2 *sh2) { - u32 a1 = a & 0x3ffff; + u32 a1 = a & 0x3fffe; + ((u16 *)sh2->p_sdram)[a1 / 2] = d; #ifdef DRC_SH2 - int t = Pico32xMem->drcblk_ram[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u8 *p = sh2->p_drcblk_ram; + u32 t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) - sh2_drc_wcheck_ram(a, t, sh2->is_slave); + sh2_sdram_checks(a, d, sh2, t); #endif - ((u16 *)Pico32xMem->sdram)[a1 / 2] = d; } static void REGPARM(3) sh2_write16_da(u32 a, u32 d, SH2 *sh2) { - u32 a1 = a & 0xfff; -#ifdef DRC_SH2 - int id = sh2->is_slave; - int t = Pico32xMem->drcblk_da[id][a1 >> SH2_DRCBLK_DA_SHIFT]; - if (t) - sh2_drc_wcheck_da(a, t, id); -#endif + u32 a1 = a & 0xffe; ((u16 *)sh2->data_array)[a1 / 2] = d; +#ifdef DRC_SH2 + u8 *p = sh2->p_drcblk_da; + u32 t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; + if (t) + sh2_da_checks(a, t, sh2); +#endif +} +#endif + +static void REGPARM(3) sh2_write16_rom(u32 a, u32 d, SH2 *sh2) +{ + u32 a1 = a & 0x3ffffe; + // tweak for WWF Raw: does writes to ROM area, and it doesn't work without + // allowing this. + // Presumably the write goes to the CPU cache and is read back from there, + // but it would be extremely costly to emulate cache behaviour. Just allow + // writes to that region, hoping that the original ROM values are never used. + if ((a1 & 0x3e0000) == 0x3e0000 && (PicoIn.quirks & PQUIRK_WWFRAW_HACK)) + ((u16 *)sh2->p_rom)[a1 / 2] = d; + else + sh2_write16_unmapped(a, d, sh2); } +static void REGPARM(3) sh2_write32_unmapped(u32 a, u32 d, SH2 *sh2) +{ + elprintf_sh2(sh2, EL_32X, "unmapped w32 [%08x] %08x @%06x", + a, d, sh2_pc(sh2)); +} -typedef u32 (sh2_read_handler)(u32 a, SH2 *sh2); +static void REGPARM(3) sh2_write32_cs0(u32 a, u32 d, SH2 *sh2) +{ + sh2_write16_cs0(a, d >> 16, sh2); + sh2_write16_cs0(a + 2, d, sh2); +} + +#define sh2_write32_dramN(p, a, d) \ + u32 *pd = &((u32 *)p)[(a & 0x1ffff) / 4]; \ + if (!(a & 0x20000)) { \ + *pd = CPU_BE2(d); \ + } else { \ + /* overwrite */ \ + u32 v = *pd, m = 0; d = CPU_BE2(d); \ + if (!(d & 0x000000ff)) m |= 0x000000ff; \ + if (!(d & 0x0000ff00)) m |= 0x0000ff00; \ + if (!(d & 0x00ff0000)) m |= 0x00ff0000; \ + if (!(d & 0xff000000)) m |= 0xff000000; \ + *pd = d | (v&m); \ + } + +#ifdef _ASM_32X_MEMORY_C +extern void REGPARM(3) sh2_write32_dram(u32 a, u32 d, SH2 *sh2); +extern void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2); +extern void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2); +#else +static void REGPARM(3) sh2_write32_dram(u32 a, u32 d, SH2 *sh2) +{ + sh2_write32_dramN(sh2->p_dram, a, d); +} + +static void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2) +{ + u32 a1 = a & 0x3fffc; + *(u32 *)((char*)sh2->p_sdram + a1) = CPU_BE2(d); +#ifdef DRC_SH2 + u8 *p = sh2->p_drcblk_ram; + u32 t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u32 u = p[(a1+2) >> SH2_DRCBLK_RAM_SHIFT]; + if (t|(u<<16)) + sh2_sdram_checks_l(a, d, sh2, t|(u<<16)); +#endif +} + +static void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2) +{ + u32 a1 = a & 0xffc; + *((u32 *)sh2->data_array + a1/4) = CPU_BE2(d); +#ifdef DRC_SH2 + u8 *p = sh2->p_drcblk_da; + u32 t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; + u32 u = p[(a1+2) >> SH2_DRCBLK_DA_SHIFT]; + if (t|(u<<16)) + sh2_da_checks_l(a, t|(u<<16), sh2); +#endif +} +#endif + +static void REGPARM(3) sh2_write32_rom(u32 a, u32 d, SH2 *sh2) +{ + sh2_write16_rom(a, d >> 16, sh2); + sh2_write16_rom(a + 2, d, sh2); +} + +typedef u32 REGPARM(2) (sh2_read_handler)(u32 a, SH2 *sh2); typedef void REGPARM(3) (sh2_write_handler)(u32 a, u32 d, SH2 *sh2); #define SH2MAP_ADDR2OFFS_R(a) \ @@ -1456,10 +1910,10 @@ u32 REGPARM(2) p32x_sh2_read8(u32 a, SH2 *sh2) sh2_map += SH2MAP_ADDR2OFFS_R(a); p = sh2_map->addr; - if (map_flag_set(p)) - return ((sh2_read_handler *)(p << 1))(a, sh2); + if (!map_flag_set(p)) + return *(s8 *)((p << 1) + MEM_BE2(a & sh2_map->mask)); else - return *(u8 *)((p << 1) + ((a & sh2_map->mask) ^ 1)); + return ((sh2_read_handler *)(p << 1))(a, sh2); } u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2) @@ -1469,33 +1923,24 @@ u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2) sh2_map += SH2MAP_ADDR2OFFS_R(a); p = sh2_map->addr; - if (map_flag_set(p)) - return ((sh2_read_handler *)(p << 1))(a, sh2); + if (!map_flag_set(p)) + return *(s16 *)((p << 1) + (a & sh2_map->mask)); else - return *(u16 *)((p << 1) + ((a & sh2_map->mask) & ~1)); + return ((sh2_read_handler *)(p << 1))(a, sh2); } u32 REGPARM(2) p32x_sh2_read32(u32 a, SH2 *sh2) { - const sh2_memmap *sh2_map = sh2->read16_map; - sh2_read_handler *handler; - u32 offs; + const sh2_memmap *sh2_map = sh2->read32_map; uptr p; - offs = SH2MAP_ADDR2OFFS_R(a); - sh2_map += offs; + sh2_map += SH2MAP_ADDR2OFFS_R(a); p = sh2_map->addr; if (!map_flag_set(p)) { - // XXX: maybe 32bit access instead with ror? - u16 *pd = (u16 *)((p << 1) + ((a & sh2_map->mask) & ~1)); - return (pd[0] << 16) | pd[1]; - } - - if (offs == SH2MAP_ADDR2OFFS_R(0xffffc000)) - return sh2_peripheral_read32(a, sh2); - - handler = (sh2_read_handler *)(p << 1); - return (handler(a, sh2) << 16) | handler(a + 2, sh2); + u32 *pd = (u32 *)((p << 1) + (a & sh2_map->mask)); + return CPU_BE2(*pd); + } else + return ((sh2_read_handler *)(p << 1))(a, sh2); } void REGPARM(3) p32x_sh2_write8(u32 a, u32 d, SH2 *sh2) @@ -1518,27 +1963,121 @@ void REGPARM(3) p32x_sh2_write16(u32 a, u32 d, SH2 *sh2) void REGPARM(3) p32x_sh2_write32(u32 a, u32 d, SH2 *sh2) { - const void **sh2_wmap = sh2->write16_tab; + const void **sh2_wmap = sh2->write32_tab; sh2_write_handler *wh; - u32 offs; - offs = SH2MAP_ADDR2OFFS_W(a); + wh = sh2_wmap[SH2MAP_ADDR2OFFS_W(a)]; + wh(a, d, sh2); +} - if (offs == SH2MAP_ADDR2OFFS_W(0xffffc000)) { - sh2_peripheral_write32(a, d, sh2); - return; +void *p32x_sh2_get_mem_ptr(u32 a, u32 *mask, SH2 *sh2) +{ + const sh2_memmap *mm = sh2->read8_map; + void *ret = (void *)-1; + + mm += SH2MAP_ADDR2OFFS_R(a); + if (!map_flag_set(mm->addr)) { + // directly mapped memory (SDRAM, ROM, data array) + ret = (void *)(mm->addr << 1); + *mask = mm->mask; + } else if ((a & ~0x7ff) == 0) { + // BIOS, has handler function since it shares its segment with I/O + ret = sh2->p_bios; + *mask = 0x7ff; + } else if ((a & 0xc6000000) == 0x02000000) { + // banked ROM. Return bank address + u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; + ret = (char*)sh2->p_rom + bank; + *mask = 0x07ffff; } - wh = sh2_wmap[offs]; - wh(a, d >> 16, sh2); - wh(a + 2, d, sh2); + return ret; +} + +int p32x_sh2_mem_is_rom(u32 a, SH2 *sh2) +{ + if ((a & 0xc6000000) == 0x02000000) { + // ROM, but mind tweak for WWF Raw + return !(PicoIn.quirks & PQUIRK_WWFRAW_HACK) || (a & 0x3f0000) < 0x3e0000; + } + + return 0; +} + +int p32x_sh2_memcpy(u32 dst, u32 src, int count, int size, SH2 *sh2) +{ + u32 mask; + u8 *ps, *pd; + int len, i; + + // check if src and dst points to memory (rom/sdram/dram/da) + if ((pd = p32x_sh2_get_mem_ptr(dst, &mask, sh2)) == (void *)-1) + return 0; + if ((ps = p32x_sh2_get_mem_ptr(src, &mask, sh2)) == (void *)-1) + return 0; + ps += src & mask; + len = count * size; + + // DRAM in byte access is always in overwrite mode + if (pd == sh2->p_dram && size == 1) + dst |= 0x20000; + + // align dst to halfword + if (dst & 1) { + p32x_sh2_write8(dst, *(u8 *)MEM_BE2((uptr)ps), sh2); + ps++, dst++, len --; + } + + // copy data + if ((uptr)ps & 1) { + // unaligned, use halfword copy mode to reduce memory bandwidth + u16 *sp = (u16 *)(ps - 1); + u16 dl, dh = *sp++; + for (i = 0; i < (len & ~1); i += 2, dst += 2, sp++) { + dl = dh, dh = *sp; + p32x_sh2_write16(dst, (dh >> 8) | (dl << 8), sh2); + } + if (len & 1) + p32x_sh2_write8(dst, dh, sh2); + } else { + // dst and src at least halfword aligned + u16 *sp = (u16 *)ps; + // align dst to word + if ((dst & 2) && len >= 2) { + p32x_sh2_write16(dst, *sp++, sh2); + dst += 2, len -= 2; + } + if ((uptr)sp & 2) { + // halfword copy, using word writes to reduce memory bandwidth + u16 dl, dh; + for (i = 0; i < (len & ~3); i += 4, dst += 4, sp += 2) { + dl = sp[0], dh = sp[1]; + p32x_sh2_write32(dst, (dl << 16) | dh, sh2); + } + } else { + // word copy + u32 d; + for (i = 0; i < (len & ~3); i += 4, dst += 4, sp += 2) { + d = *(u32 *)sp; + p32x_sh2_write32(dst, CPU_BE2(d), sh2); + } + } + if (len & 2) { + p32x_sh2_write16(dst, *sp++, sh2); + dst += 2; + } + if (len & 1) + p32x_sh2_write8(dst, *sp >> 8, sh2); + } + + return count; } // ----------------------------------------------------------------- -static void z80_md_bank_write_32x(unsigned int a, unsigned char d) +static void z80_md_bank_write_32x(u32 a, unsigned char d) { - unsigned int addr68k; + u32 addr68k; addr68k = Pico.m.z80_bank68k << 15; addr68k += a & 0x7fff; @@ -1643,7 +2182,6 @@ static const u16 ssh2_code[] = { 0x2400, 0x0018, // 23c _start_cd }; -#define HWSWAP(x) (((u16)(x) << 16) | ((x) >> 16)) static void get_bios(void) { u16 *ps; @@ -1656,23 +2194,37 @@ static void get_bios(void) Byteswap(Pico32xMem->m68k_rom, p32x_bios_g, sizeof(Pico32xMem->m68k_rom)); } else { + static const u16 andb[] = { 0x0239, 0x00fe, 0x00a1, 0x5107 }; + static const u16 p_d4[] = { + 0x48e7, 0x8040, // movem.l d0/a1, -(sp) + 0x227c, 0x00a1, 0x30f1, // movea.l #0xa130f1, a1 + 0x7007, // moveq.l #7, d0 + 0x12d8, //0: move.b (a0)+, (a1)+ + 0x5289, // addq.l #1, a1 + 0x51c8, 0xfffa, // dbra d0, 0b + 0x0239, 0x00fe, 0x00a1, // and.b #0xfe, (0xa15107).l + 0x5107, + 0x4cdf, 0x0201 // movem.l (sp)+, d0/a1 + }; + // generate 68k ROM ps = (u16 *)Pico32xMem->m68k_rom; pl = (u32 *)ps; for (i = 1; i < 0xc0/4; i++) - pl[i] = HWSWAP(0x880200 + (i - 1) * 6); + pl[i] = CPU_BE2(0x880200 + (i - 1) * 6); + pl[0x70/4] = 0; // fill with nops for (i = 0xc0/2; i < 0x100/2; i++) ps[i] = 0x4e71; -#if 0 - ps[0xc0/2] = 0x46fc; - ps[0xc2/2] = 0x2700; // move #0x2700,sr - ps[0xfe/2] = 0x60fe; // jump to self -#else + // c0: don't need to care about RV - not emulated + ps[0xc8/2] = 0x1280; // move.b d0, (a1) + memcpy(ps + 0xca/2, andb, sizeof(andb)); // and.b #0xfe, (a15107) + ps[0xd2/2] = 0x4e75; // rts + // d4: + memcpy(ps + 0xd4/2, p_d4, sizeof(p_d4)); ps[0xfe/2] = 0x4e75; // rts -#endif } // fill remaining m68k_rom page with game ROM memcpy(Pico32xMem->m68k_rom_bank + sizeof(Pico32xMem->m68k_rom), @@ -1686,18 +2238,27 @@ static void get_bios(void) } else { pl = (u32 *)&Pico32xMem->sh2_rom_m; + ps = (u16 *)pl; // fill exception vector table to our trap address - for (i = 0; i < 128; i++) - pl[i] = HWSWAP(0x200); + for (i = 0; i < 80; i++) + pl[i] = CPU_BE2(0x200); + // CD titles by Digital Pictures jump to 0x140 for resetting ... + for (i = 0x140/2; i < 0x1fc/2; i++) + ps[i] = 0x0009; // nop // ... so fill the remainder with nops + ps[i++] = 0xa002; // bra 0x204 // ... and jump over the trap + ps[i++] = 0x0009; // nop // start - pl[0] = pl[2] = HWSWAP(0x204); + pl[0] = pl[2] = CPU_BE2(0x204); // reset SP - pl[1] = pl[3] = HWSWAP(0x6040000); + pl[1] = pl[3] = CPU_BE2(0x6040000); // startup code memcpy(&Pico32xMem->sh2_rom_m.b[0x200], msh2_code, sizeof(msh2_code)); + if (!Pico.m.ncart_in && (PicoIn.AHW & PAHW_MCD)) + // hack for MSU games (adjust delay loop for copying the MSU code to sub) + Pico32xMem->sh2_rom_m.w[0x224/2] = 0x0090; } // SSH2 @@ -1710,12 +2271,12 @@ static void get_bios(void) // fill exception vector table to our trap address for (i = 0; i < 128; i++) - pl[i] = HWSWAP(0x200); + pl[i] = CPU_BE2(0x200); // start - pl[0] = pl[2] = HWSWAP(0x204); + pl[0] = pl[2] = CPU_BE2(0x204); // reset SP - pl[1] = pl[3] = HWSWAP(0x603f800); + pl[1] = pl[3] = CPU_BE2(0x603f800); // startup code memcpy(&Pico32xMem->sh2_rom_s.b[0x200], ssh2_code, sizeof(ssh2_code)); @@ -1725,27 +2286,52 @@ static void get_bios(void) #define MAP_MEMORY(m) ((uptr)(m) >> 1) #define MAP_HANDLER(h) ( ((uptr)(h) >> 1) | ((uptr)1 << (sizeof(uptr) * 8 - 1)) ) -static sh2_memmap sh2_read8_map[0x80], sh2_read16_map[0x80]; +static sh2_memmap msh2_read8_map[0x80], msh2_read16_map[0x80], msh2_read32_map[0x80]; +static sh2_memmap ssh2_read8_map[0x80], ssh2_read16_map[0x80], ssh2_read32_map[0x80]; // for writes we are using handlers only -static sh2_write_handler *sh2_write8_map[0x80], *sh2_write16_map[0x80]; +static sh2_write_handler *msh2_write8_map[0x80], *msh2_write16_map[0x80], *msh2_write32_map[0x80]; +static sh2_write_handler *ssh2_write8_map[0x80], *ssh2_write16_map[0x80], *ssh2_write32_map[0x80]; void Pico32xSwapDRAM(int b) { - cpu68k_map_set(m68k_read8_map, 0x840000, 0x85ffff, Pico32xMem->dram[b], 0); - cpu68k_map_set(m68k_read16_map, 0x840000, 0x85ffff, Pico32xMem->dram[b], 0); - cpu68k_map_set(m68k_read8_map, 0x860000, 0x87ffff, Pico32xMem->dram[b], 0); - cpu68k_map_set(m68k_read16_map, 0x860000, 0x87ffff, Pico32xMem->dram[b], 0); + cpu68k_map_read_mem(0x840000, 0x85ffff, Pico32xMem->dram[b], 0); + cpu68k_map_read_mem(0x860000, 0x87ffff, Pico32xMem->dram[b], 0); cpu68k_map_set(m68k_write8_map, 0x840000, 0x87ffff, b ? m68k_write8_dram1_ow : m68k_write8_dram0_ow, 1); cpu68k_map_set(m68k_write16_map, 0x840000, 0x87ffff, b ? m68k_write16_dram1_ow : m68k_write16_dram0_ow, 1); // SH2 - sh2_read8_map[0x04/2].addr = sh2_read8_map[0x24/2].addr = - sh2_read16_map[0x04/2].addr = sh2_read16_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]); + msh2_read8_map[0x04/2].addr = msh2_read8_map[0x24/2].addr = + msh2_read16_map[0x04/2].addr = msh2_read16_map[0x24/2].addr = + msh2_read32_map[0x04/2].addr = msh2_read32_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]); + ssh2_read8_map[0x04/2].addr = ssh2_read8_map[0x24/2].addr = + ssh2_read16_map[0x04/2].addr = ssh2_read16_map[0x24/2].addr = + ssh2_read32_map[0x04/2].addr = ssh2_read32_map[0x24/2].addr = MAP_MEMORY(Pico32xMem->dram[b]); - sh2_write8_map[0x04/2] = sh2_write8_map[0x24/2] = b ? sh2_write8_dram1 : sh2_write8_dram0; - sh2_write16_map[0x04/2] = sh2_write16_map[0x24/2] = b ? sh2_write16_dram1 : sh2_write16_dram0; + // convenience ptrs + msh2.p_dram = ssh2.p_dram = Pico32xMem->dram[b]; +} + +static void bank_switch_rom_sh2(void) +{ + if (!carthw_ssf2_active) { + // easy + msh2_read8_map[0x02/2].addr = msh2_read8_map[0x22/2].addr = + msh2_read16_map[0x02/2].addr = msh2_read16_map[0x22/2].addr = + msh2_read32_map[0x02/2].addr = msh2_read32_map[0x22/2].addr = MAP_MEMORY(Pico.rom); + ssh2_read8_map[0x02/2].addr = ssh2_read8_map[0x22/2].addr = + ssh2_read16_map[0x02/2].addr = ssh2_read16_map[0x22/2].addr = + ssh2_read32_map[0x02/2].addr = ssh2_read32_map[0x22/2].addr = MAP_MEMORY(Pico.rom); + } + else { + msh2_read8_map[0x02/2].addr = msh2_read8_map[0x22/2].addr = MAP_HANDLER(sh2_read8_rom); + msh2_read16_map[0x02/2].addr = msh2_read16_map[0x22/2].addr = MAP_HANDLER(sh2_read16_rom); + msh2_read32_map[0x02/2].addr = msh2_read32_map[0x22/2].addr = MAP_HANDLER(sh2_read32_rom); + ssh2_read8_map[0x02/2].addr = ssh2_read8_map[0x22/2].addr = MAP_HANDLER(sh2_read8_rom); + ssh2_read16_map[0x02/2].addr = ssh2_read16_map[0x22/2].addr = MAP_HANDLER(sh2_read16_rom); + ssh2_read32_map[0x02/2].addr = ssh2_read32_map[0x22/2].addr = MAP_HANDLER(sh2_read32_rom); + } } void PicoMemSetup32x(void) @@ -1753,12 +2339,6 @@ void PicoMemSetup32x(void) unsigned int rs; int i; - Pico32xMem = plat_mmap(0x06000000, sizeof(*Pico32xMem), 0, 0); - if (Pico32xMem == NULL) { - elprintf(EL_STATUS, "OOM"); - return; - } - get_bios(); // cartridge area becomes unmapped @@ -1782,15 +2362,9 @@ void PicoMemSetup32x(void) cpu68k_map_set(m68k_read16_map, 0x880000, 0x880000 + rs - 1, Pico.rom, 0); cpu68k_map_set(m68k_write8_map, 0x880000, 0x880000 + rs - 1, PicoWrite8_cart, 1); cpu68k_map_set(m68k_write16_map, 0x880000, 0x880000 + rs - 1, PicoWrite16_cart, 1); -#ifdef EMU_F68K - // setup FAME fetchmap - PicoCpuFM68k.Fetch[0] = (unsigned long)Pico32xMem->m68k_rom; - for (rs = 0x88; rs < 0x90; rs++) - PicoCpuFM68k.Fetch[rs] = (unsigned long)Pico.rom - 0x880000; -#endif // 32X ROM (banked) - bank_switch(0); + bank_switch_rom_68k(Pico32x.regs[4 / 2]); cpu68k_map_set(m68k_write8_map, 0x900000, 0x9fffff, PicoWrite8_bank, 1); cpu68k_map_set(m68k_write16_map, 0x900000, 0x9fffff, PicoWrite16_bank, 1); } @@ -1801,84 +2375,154 @@ void PicoMemSetup32x(void) cpu68k_map_set(m68k_write8_map, 0xa10000, 0xa1ffff, PicoWrite8_32x_on, 1); cpu68k_map_set(m68k_write16_map, 0xa10000, 0xa1ffff, PicoWrite16_32x_on, 1); - // SH2 maps: A31,A30,A29,CS1,CS0 - // all unmapped by default - for (i = 0; i < ARRAY_SIZE(sh2_read8_map); i++) { - sh2_read8_map[i].addr = MAP_HANDLER(sh2_read8_unmapped); - sh2_read16_map[i].addr = MAP_HANDLER(sh2_read16_unmapped); + // TODO: cd + carthw + if (PicoIn.AHW & PAHW_MCD) { + m68k_write8_io = PicoWrite8_32x_on_io_cd; + m68k_write16_io = PicoWrite16_32x_on_io_cd; + } + else if (carthw_ssf2_active) { + m68k_write8_io = PicoWrite8_32x_on_io_ssf2; + m68k_write16_io = PicoWrite16_32x_on_io_ssf2; + } + else { + m68k_write8_io = PicoWrite8_32x_on_io; + m68k_write16_io = PicoWrite16_32x_on_io; } - for (i = 0; i < ARRAY_SIZE(sh2_write8_map); i++) { - sh2_write8_map[i] = sh2_write8_unmapped; - sh2_write16_map[i] = sh2_write16_unmapped; + // SH2 maps: A31,A30,A29,CS1,CS0 + // all unmapped by default + for (i = 0; i < ARRAY_SIZE(msh2_read8_map); i++) { + msh2_read8_map[i].addr = MAP_HANDLER(sh2_read8_unmapped); + msh2_read16_map[i].addr = MAP_HANDLER(sh2_read16_unmapped); + msh2_read32_map[i].addr = MAP_HANDLER(sh2_read32_unmapped); + } + + for (i = 0; i < ARRAY_SIZE(msh2_write8_map); i++) { + msh2_write8_map[i] = sh2_write8_unmapped; + msh2_write16_map[i] = sh2_write16_unmapped; + msh2_write32_map[i] = sh2_write32_unmapped; } // "purge area" for (i = 0x40; i <= 0x5f; i++) { - sh2_write8_map[i >> 1] = - sh2_write16_map[i >> 1] = sh2_write_ignore; + msh2_write8_map[i >> 1] = + msh2_write16_map[i >> 1] = + msh2_write32_map[i >> 1] = sh2_write_ignore; } // CS0 - sh2_read8_map[0x00/2].addr = sh2_read8_map[0x20/2].addr = MAP_HANDLER(sh2_read8_cs0); - sh2_read16_map[0x00/2].addr = sh2_read16_map[0x20/2].addr = MAP_HANDLER(sh2_read16_cs0); - sh2_write8_map[0x00/2] = sh2_write8_map[0x20/2] = sh2_write8_cs0; - sh2_write16_map[0x00/2] = sh2_write16_map[0x20/2] = sh2_write16_cs0; + msh2_read8_map[0x00/2].addr = msh2_read8_map[0x20/2].addr = MAP_HANDLER(sh2_read8_cs0); + msh2_read16_map[0x00/2].addr = msh2_read16_map[0x20/2].addr = MAP_HANDLER(sh2_read16_cs0); + msh2_read32_map[0x00/2].addr = msh2_read32_map[0x20/2].addr = MAP_HANDLER(sh2_read32_cs0); + msh2_write8_map[0x00/2] = msh2_write8_map[0x20/2] = sh2_write8_cs0; + msh2_write16_map[0x00/2] = msh2_write16_map[0x20/2] = sh2_write16_cs0; + msh2_write32_map[0x00/2] = msh2_write32_map[0x20/2] = sh2_write32_cs0; // CS1 - ROM - sh2_read8_map[0x02/2].addr = sh2_read8_map[0x22/2].addr = - sh2_read16_map[0x02/2].addr = sh2_read16_map[0x22/2].addr = MAP_MEMORY(Pico.rom); - sh2_read8_map[0x02/2].mask = sh2_read8_map[0x22/2].mask = - sh2_read16_map[0x02/2].mask = sh2_read16_map[0x22/2].mask = 0x3fffff; // FIXME - // CS2 - DRAM - done by Pico32xSwapDRAM() - sh2_read8_map[0x04/2].mask = sh2_read8_map[0x24/2].mask = - sh2_read16_map[0x04/2].mask = sh2_read16_map[0x24/2].mask = 0x01ffff; + bank_switch_rom_sh2(); + for (rs = 0x8000; rs < Pico.romsize && rs < 0x400000; rs *= 2) ; + msh2_read8_map[0x02/2].mask = msh2_read8_map[0x22/2].mask = rs-1; + msh2_read16_map[0x02/2].mask = msh2_read16_map[0x22/2].mask = rs-1; + msh2_read32_map[0x02/2].mask = msh2_read32_map[0x22/2].mask = rs-1; + msh2_write16_map[0x02/2] = msh2_write16_map[0x22/2] = sh2_write16_rom; + msh2_write32_map[0x02/2] = msh2_write32_map[0x22/2] = sh2_write32_rom; + // CS2 - DRAM + msh2_read8_map[0x04/2].mask = msh2_read8_map[0x24/2].mask = 0x01ffff; + msh2_read16_map[0x04/2].mask = msh2_read16_map[0x24/2].mask = 0x01fffe; + msh2_read32_map[0x04/2].mask = msh2_read32_map[0x24/2].mask = 0x01fffc; + msh2_write8_map[0x04/2] = msh2_write8_map[0x24/2] = sh2_write8_dram; + msh2_write16_map[0x04/2] = msh2_write16_map[0x24/2] = sh2_write16_dram; + msh2_write32_map[0x04/2] = msh2_write32_map[0x24/2] = sh2_write32_dram; + // CS3 - SDRAM - sh2_read8_map[0x06/2].addr = sh2_read8_map[0x26/2].addr = - sh2_read16_map[0x06/2].addr = sh2_read16_map[0x26/2].addr = MAP_MEMORY(Pico32xMem->sdram); - sh2_write8_map[0x06/2] = sh2_write8_sdram; - sh2_write8_map[0x26/2] = sh2_write8_sdram_wt; - sh2_write16_map[0x06/2] = sh2_write16_map[0x26/2] = sh2_write16_sdram; - sh2_read8_map[0x06/2].mask = sh2_read8_map[0x26/2].mask = - sh2_read16_map[0x06/2].mask = sh2_read16_map[0x26/2].mask = 0x03ffff; + msh2_read8_map[0x06/2].addr = msh2_read8_map[0x26/2].addr = + msh2_read16_map[0x06/2].addr = msh2_read16_map[0x26/2].addr = + msh2_read32_map[0x06/2].addr = msh2_read32_map[0x26/2].addr = MAP_MEMORY(Pico32xMem->sdram); + msh2_write8_map[0x06/2] = msh2_write8_map[0x26/2] = sh2_write8_sdram; + + msh2_write16_map[0x06/2] = msh2_write16_map[0x26/2] = sh2_write16_sdram; + msh2_write32_map[0x06/2] = msh2_write32_map[0x26/2] = sh2_write32_sdram; + msh2_read8_map[0x06/2].mask = msh2_read8_map[0x26/2].mask = 0x03ffff; + msh2_read16_map[0x06/2].mask = msh2_read16_map[0x26/2].mask = 0x03fffe; + msh2_read32_map[0x06/2].mask = msh2_read32_map[0x26/2].mask = 0x03fffc; // SH2 data array - sh2_read8_map[0xc0/2].addr = MAP_HANDLER(sh2_read8_da); - sh2_read16_map[0xc0/2].addr = MAP_HANDLER(sh2_read16_da); - sh2_write8_map[0xc0/2] = sh2_write8_da; - sh2_write16_map[0xc0/2] = sh2_write16_da; + msh2_read8_map[0xc0/2].mask = 0x0fff; + msh2_read16_map[0xc0/2].mask = 0x0ffe; + msh2_read32_map[0xc0/2].mask = 0x0ffc; + msh2_write8_map[0xc0/2] = sh2_write8_da; + msh2_write16_map[0xc0/2] = sh2_write16_da; + msh2_write32_map[0xc0/2] = sh2_write32_da; // SH2 IO - sh2_read8_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read8); - sh2_read16_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read16); - sh2_write8_map[0xff/2] = sh2_peripheral_write8; - sh2_write16_map[0xff/2] = sh2_peripheral_write16; + msh2_read8_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read8); + msh2_read16_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read16); + msh2_read32_map[0xff/2].addr = MAP_HANDLER(sh2_peripheral_read32); + msh2_write8_map[0xff/2] = sh2_peripheral_write8; + msh2_write16_map[0xff/2] = sh2_peripheral_write16; + msh2_write32_map[0xff/2] = sh2_peripheral_write32; + + memcpy(ssh2_read8_map, msh2_read8_map, sizeof(msh2_read8_map)); + memcpy(ssh2_read16_map, msh2_read16_map, sizeof(msh2_read16_map)); + memcpy(ssh2_read32_map, msh2_read32_map, sizeof(msh2_read32_map)); + memcpy(ssh2_write8_map, msh2_write8_map, sizeof(msh2_write8_map)); + memcpy(ssh2_write16_map, msh2_write16_map, sizeof(msh2_write16_map)); + memcpy(ssh2_write32_map, msh2_write32_map, sizeof(msh2_write32_map)); + + msh2_read8_map[0xc0/2].addr = + msh2_read16_map[0xc0/2].addr = + msh2_read32_map[0xc0/2].addr = MAP_MEMORY(msh2.data_array); + ssh2_read8_map[0xc0/2].addr = + ssh2_read16_map[0xc0/2].addr = + ssh2_read32_map[0xc0/2].addr = MAP_MEMORY(ssh2.data_array); // map DRAM area, both 68k and SH2 - Pico32xSwapDRAM(1); + Pico32xSwapDRAM((Pico32x.vdp_regs[0x0a / 2] & P32XV_FS) ^ P32XV_FS); - msh2.read8_map = ssh2.read8_map = sh2_read8_map; - msh2.read16_map = ssh2.read16_map = sh2_read16_map; - msh2.write8_tab = ssh2.write8_tab = (const void **)(void *)sh2_write8_map; - msh2.write16_tab = ssh2.write16_tab = (const void **)(void *)sh2_write16_map; + msh2.read8_map = msh2_read8_map; ssh2.read8_map = ssh2_read8_map; + msh2.read16_map = msh2_read16_map; ssh2.read16_map = ssh2_read16_map; + msh2.read32_map = msh2_read32_map; ssh2.read32_map = ssh2_read32_map; + msh2.write8_tab = (const void **)(void *)msh2_write8_map; + msh2.write16_tab = (const void **)(void *)msh2_write16_map; + msh2.write32_tab = (const void **)(void *)msh2_write32_map; + ssh2.write8_tab = (const void **)(void *)ssh2_write8_map; + ssh2.write16_tab = (const void **)(void *)ssh2_write16_map; + ssh2.write32_tab = (const void **)(void *)ssh2_write32_map; + + // convenience ptrs + msh2.p_sdram = ssh2.p_sdram = Pico32xMem->sdram; + msh2.p_rom = ssh2.p_rom = Pico.rom; + msh2.p_bios = Pico32xMem->sh2_rom_m.w; msh2.p_da = msh2.data_array; + ssh2.p_bios = Pico32xMem->sh2_rom_s.w; ssh2.p_da = ssh2.data_array; sh2_drc_mem_setup(&msh2); sh2_drc_mem_setup(&ssh2); + memset(sh2_poll_rd, 0, sizeof(sh2_poll_rd)); + memset(sh2_poll_wr, 0, sizeof(sh2_poll_wr)); + memset(sh2_poll_fifo, -1, sizeof(sh2_poll_fifo)); // z80 hack z80_map_set(z80_write_map, 0x8000, 0xffff, z80_md_bank_write_32x, 1); } +void p32x_update_banks(void) +{ + bank_switch_rom_68k(Pico32x.regs[4 / 2]); + bank_switch_rom_sh2(); + if (Pico32x.emu_flags & P32XF_DRC_ROM_C) + sh2_drc_flush_all(); +} + void Pico32xMemStateLoaded(void) { - bank_switch(Pico32x.regs[4 / 2]); + bank_switch_rom_68k(Pico32x.regs[4 / 2]); Pico32xSwapDRAM((Pico32x.vdp_regs[0x0a / 2] & P32XV_FS) ^ P32XV_FS); memset(Pico32xMem->pwm, 0, sizeof(Pico32xMem->pwm)); Pico32x.dirty_pal = 1; - Pico32x.emu_flags &= ~(P32XF_68KCPOLL | P32XF_68KVPOLL); memset(&m68k_poll, 0, sizeof(m68k_poll)); msh2.state = 0; msh2.poll_addr = msh2.poll_cycles = msh2.poll_cnt = 0; ssh2.state = 0; ssh2.poll_addr = ssh2.poll_cycles = ssh2.poll_cnt = 0; + memset(sh2_poll_fifo, 0, sizeof(sh2_poll_fifo)); sh2_drc_flush_all(); } diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S new file mode 100644 index 00000000..1af4366b --- /dev/null +++ b/pico/32x/memory_arm.S @@ -0,0 +1,288 @@ +/* + * PicoDrive 32X memory access functions, assembler version + * (C) irixxxx, 2018-2021 + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ + +#include + +@ 32X bank sizes... TODO this should somehow come from an include file +.equ SH2_ROM_SHIFT, 10 @ 0x003fffff +.equ SH2_RAM_SHIFT, 14 @ 0x0003ffff +.equ SH2_DRAM_SHIFT,15 @ 0x0001ffff +.equ SH2_DA_SHIFT, 20 @ 0x00000fff + +.equ SH2_DRAM_OW, 1<<(32-SH2_DRAM_SHIFT) @ DRAM overwrite mode bit + +.text +.align 5 + +#if 0 +@ u32 a, SH2 *sh2 +.global sh2_read8_rom +.global sh2_read8_sdram +.global sh2_read8_da +.global sh2_read8_dram +.global sh2_read16_rom +.global sh2_read16_sdram +.global sh2_read16_da +.global sh2_read16_dram +.global sh2_read32_rom +.global sh2_read32_sdram +.global sh2_read32_da +.global sh2_read32_dram +#endif + +@ u32 a, u32 d, SH2 *sh2 +.global sh2_write8_sdram +.global sh2_write8_da +.global sh2_write8_dram +.global sh2_write16_sdram +.global sh2_write16_da +.global sh2_write16_dram +.global sh2_write32_sdram +.global sh2_write32_da +.global sh2_write32_dram + +#if 0 +sh2_read8_rom: + ldr ip, [r1, #OFS_SH2_p_rom] + eor r0, r0, #1 + mov r0, r0, lsl #SH2_ROM_SHIFT + ldrb r0, [ip, r0, lsr #SH2_ROM_SHIFT] + bx lr + +sh2_read8_sdram: + ldr ip, [r1, #OFS_SH2_p_sdram] + eor r0, r0, #1 + mov r0, r0, lsl #SH2_RAM_SHIFT + ldrb r0, [ip, r0, lsr #SH2_RAM_SHIFT] + bx lr + +sh2_read8_da: + ldr ip, [r1, #OFS_SH2_p_da] + eor r0, r0, #1 + mov r0, r0, lsl #SH2_DA_SHIFT + ldrb r0, [ip, r0, lsr #SH2_DA_SHIFT] + bx lr + +sh2_read8_dram: + ldr ip, [r1, #OFS_SH2_p_dram] + eor r0, r0, #1 + mov r0, r0, lsl #SH2_DRAM_SHIFT + ldrb r0, [ip, r0, lsr #SH2_DRAM_SHIFT] + bx lr + +sh2_read16_rom: + ldr ip, [r1, #OFS_SH2_p_rom] + mov r0, r0, lsl #SH2_ROM_SHIFT + mov r0, r0, lsr #SH2_ROM_SHIFT + ldrh r0, [ip, r0] + bx lr + +sh2_read16_sdram: + ldr ip, [r1, #OFS_SH2_p_sdram] + mov r0, r0, lsl #SH2_RAM_SHIFT + mov r0, r0, lsr #SH2_RAM_SHIFT + ldrh r0, [ip, r0] + bx lr + +sh2_read16_da: + ldr ip, [r1, #OFS_SH2_p_da] + mov r0, r0, lsl #SH2_DA_SHIFT + mov r0, r0, lsr #SH2_DA_SHIFT + ldrh r0, [ip, r0] + bx lr + +sh2_read16_dram: + ldr ip, [r1, #OFS_SH2_p_dram] + mov r0, r0, lsl #SH2_DRAM_SHIFT + mov r0, r0, lsr #SH2_DRAM_SHIFT + ldrh r0, [ip, r0] + bx lr + +sh2_read32_rom: + ldr ip, [r1, #OFS_SH2_p_rom] + mov r0, r0, lsl #SH2_ROM_SHIFT + ldr r0, [ip, r0, lsr #SH2_ROM_SHIFT] + mov r0, r0, ror #16 + bx lr + +sh2_read32_sdram: + ldr ip, [r1, #OFS_SH2_p_sdram] + mov r0, r0, lsl #SH2_RAM_SHIFT + ldr r0, [ip, r0, lsr #SH2_RAM_SHIFT] + mov r0, r0, ror #16 + bx lr + +sh2_read32_da: + ldr ip, [r1, #OFS_SH2_p_da] + mov r0, r0, lsl #SH2_DA_SHIFT + ldr r0, [ip, r0, lsr #SH2_DA_SHIFT] + mov r0, r0, ror #16 + bx lr + +sh2_read32_dram: + ldr ip, [r1, #OFS_SH2_p_dram] + mov r0, r0, lsl #SH2_DRAM_SHIFT + ldr r0, [ip, r0, lsr #SH2_DRAM_SHIFT] + mov r0, r0, ror #16 + bx lr +#endif + +sh2_write8_sdram: + @ preserve r0-r2 for tail call + ldr ip, [r2, #OFS_SH2_p_sdram] + eor r3, r0, #1 + mov r3, r3, lsl #SH2_RAM_SHIFT + strb r1, [ip, r3, lsr #SH2_RAM_SHIFT] +#ifdef DRC_SH2 + ldr r1, [r2, #OFS_SH2_p_drcblk_ram] + ldrb r3, [r1, r3, lsr #SH2_RAM_SHIFT+1] + cmp r3, #0 + bxeq lr + @ need to load aligned 16 bit data for check + bic r0, r0, #1 + mov r1, r0, lsl #SH2_RAM_SHIFT + mov r1, r1, lsr #SH2_RAM_SHIFT + ldrh r1, [ip, r1] + b sh2_sdram_checks +#else + bx lr +#endif + +sh2_write8_da: + @ preserve r0 and r2 for tail call + ldr ip, [r2, #OFS_SH2_p_da] + eor r3, r0, #1 + mov r3, r3, lsl #SH2_DA_SHIFT + strb r1, [ip, r3, lsr #SH2_DA_SHIFT] +#ifdef DRC_SH2 + ldr ip, [r2, #OFS_SH2_p_drcblk_da] + ldrb r1, [ip, r3, lsr #SH2_DA_SHIFT+1] + bic r0, r0, #1 + cmp r1, #0 + bxeq lr + mov r1, #2 + b sh2_drc_wcheck_da +#else + bx lr +#endif + +sh2_write8_dram: + tst r1, #0xff + ldrne ip, [r2, #OFS_SH2_p_dram] + eorne r3, r0, #1 + movne r3, r3, lsl #SH2_DRAM_SHIFT + strneb r1, [ip, r3, lsr #SH2_DRAM_SHIFT] + bx lr + +sh2_write16_sdram: + @ preserve r0-r2 for tail call + ldr ip, [r2, #OFS_SH2_p_sdram] + mov r3, r0, lsl #SH2_RAM_SHIFT + mov r3, r3, lsr #SH2_RAM_SHIFT + strh r1, [ip, r3] +#ifdef DRC_SH2 + ldr ip, [r2, #OFS_SH2_p_drcblk_ram] + ldrb r3, [ip, r3, lsr #1] + cmp r3, #0 + bxeq lr + b sh2_sdram_checks +#else + bx lr +#endif + +sh2_write16_da: + @ preserve r0 and r2 for tail call + ldr ip, [r2, #OFS_SH2_p_da] + mov r3, r0, lsl #SH2_DA_SHIFT + mov r3, r3, lsr #SH2_DA_SHIFT + strh r1, [ip, r3] +#ifdef DRC_SH2 + ldr ip, [r2, #OFS_SH2_p_drcblk_da] + ldrb r1, [ip, r3, lsr #1] + cmp r1, #0 + bxeq lr + mov r1, #2 + b sh2_drc_wcheck_da +#else + bx lr +#endif + +sh2_write16_dram: + ldr ip, [r2, #OFS_SH2_p_dram] + tst r0, #SH2_DRAM_OW + mov r3, r0, lsl #SH2_DRAM_SHIFT + mov r3, r3, lsr #SH2_DRAM_SHIFT + streqh r1, [ip, r3] + bxeq lr + add ip, ip, r3 + tst r1, #0xff + strneb r1, [ip, #0] + tst r1, #0xff00 + movne r1, r1, lsr #8 + strneb r1, [ip, #1] + bx lr + +sh2_write32_sdram: + @ preserve r0-r2 for tail call + ldr ip, [r2, #OFS_SH2_p_sdram] + mov r1, r1, ror #16 + mov r3, r0, lsl #SH2_RAM_SHIFT + str r1, [ip, r3, lsr #SH2_RAM_SHIFT] +#ifdef DRC_SH2 + ldr ip, [r2, #OFS_SH2_p_drcblk_ram] + ldrb r3, [ip, r3, lsr #SH2_RAM_SHIFT+1]! + ldrb ip, [ip, #1] + orrs r3, r3, ip, lsl #16 + bxeq lr + mov r1, r1, ror #16 + b sh2_sdram_checks_l +#else + bx lr +#endif + +sh2_write32_da: + @ preserve r0 and r2 for tail call + ldr ip, [r2, #OFS_SH2_p_da] + mov r1, r1, ror #16 + mov r3, r0, lsl #SH2_DA_SHIFT + str r1, [ip, r3, lsr #SH2_DA_SHIFT] +#ifdef DRC_SH2 + ldr ip, [r2, #OFS_SH2_p_drcblk_da] + ldrb r1, [ip, r3, lsr #SH2_DA_SHIFT+1]! + ldrb ip, [ip, #1] + orrs r1, r1, ip, lsl #16 + bxeq lr + mov r1, #4 + b sh2_drc_wcheck_da +#else + bx lr +#endif + +sh2_write32_dram: + ldr ip, [r2, #OFS_SH2_p_dram] + tst r0, #SH2_DRAM_OW + mov r3, r0, lsl #SH2_DRAM_SHIFT + mov r1, r1, ror #16 + streq r1, [ip, r3, lsr #SH2_DRAM_SHIFT] + bxeq lr + ldr r0, [ip, r3, lsr #SH2_DRAM_SHIFT] + tst r1, #0x00ff0000 + bicne r0, r0, #0x00ff0000 + tst r1, #0xff000000 + bicne r0, r0, #0xff000000 + tst r1, #0x000000ff + bicne r0, r0, #0x000000ff + tst r1, #0x0000ff00 + bicne r0, r0, #0x0000ff00 + orr r0, r0, r1 + str r0, [ip, r3, lsr #SH2_DRAM_SHIFT] + bx lr + +.pool + +@ vim:filetype=armasm diff --git a/pico/32x/pwm.c b/pico/32x/pwm.c index 1aba914f..6c66146e 100644 --- a/pico/32x/pwm.c +++ b/pico/32x/pwm.c @@ -1,46 +1,57 @@ /* * PicoDrive * (C) notaz, 2009,2010,2013 + * (C) irixxxx, 2019-2023 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ #include "../pico_int.h" -static int pwm_cycles; -static int pwm_mult; -static int pwm_ptr; -static int pwm_irq_reload; -static int pwm_doing_fifo; -static int pwm_silent; +static struct { + int cycles; + unsigned mult; + int ptr; + int irq_reload; + int doing_fifo; + int silent; + int irq_timer; + int irq_state; + short current[2]; +} pwm; + +enum { PWM_IRQ_LOCKED, PWM_IRQ_STOPPED, PWM_IRQ_LOW, PWM_IRQ_HIGH }; void p32x_pwm_ctl_changed(void) { int control = Pico32x.regs[0x30 / 2]; int cycles = Pico32x.regs[0x32 / 2]; + int pwm_irq_opt = PicoIn.opt & POPT_PWM_IRQ_OPT; cycles = (cycles - 1) & 0x0fff; - pwm_cycles = cycles; + pwm.cycles = cycles; // supposedly we should stop FIFO when xMd is 0, // but mars test disagrees - pwm_mult = 0; + pwm.mult = 0; if ((control & 0x0f) != 0) - pwm_mult = 0x10000 / cycles; + pwm.mult = (0x10000<<8) / (cycles+1); - pwm_irq_reload = (control & 0x0f00) >> 8; - pwm_irq_reload = ((pwm_irq_reload - 1) & 0x0f) + 1; + pwm.irq_timer = (control & 0x0f00) >> 8; + pwm.irq_timer = ((pwm.irq_timer - 1) & 0x0f) + 1; + pwm.irq_reload = pwm.irq_timer; + pwm.irq_state = pwm_irq_opt ? PWM_IRQ_STOPPED: PWM_IRQ_LOCKED; - if (Pico32x.pwm_irq_cnt == 0) - Pico32x.pwm_irq_cnt = pwm_irq_reload; + if (Pico32x.pwm_irq_cnt <= 0) + Pico32x.pwm_irq_cnt = pwm.irq_reload; } static void do_pwm_irq(SH2 *sh2, unsigned int m68k_cycles) { - p32x_trigger_irq(sh2, m68k_cycles, P32XI_PWM); + p32x_trigger_irq(NULL, m68k_cycles, P32XI_PWM); if (Pico32x.regs[0x30 / 2] & P32XP_RTP) { - p32x_event_schedule(m68k_cycles, P32X_EVENT_PWM, pwm_cycles / 3 + 1); + p32x_event_schedule(m68k_cycles, P32X_EVENT_PWM, pwm.cycles / 3 + 1); // note: might recurse p32x_dreq1_trigger(); } @@ -48,16 +59,14 @@ static void do_pwm_irq(SH2 *sh2, unsigned int m68k_cycles) static int convert_sample(unsigned int v) { - if (v == 0) - return 0; - if (v > pwm_cycles) - v = pwm_cycles; - return ((int)v - pwm_cycles / 2) * pwm_mult; + if (v > pwm.cycles) + v = pwm.cycles; + return (v * pwm.mult >> 8) - 0x10000/2; } #define consume_fifo(sh2, m68k_cycles) { \ int cycles_diff = ((m68k_cycles) * 3) - Pico32x.pwm_cycle_p; \ - if (cycles_diff >= pwm_cycles) \ + if (cycles_diff >= pwm.cycles) \ consume_fifo_do(sh2, m68k_cycles, cycles_diff); \ } @@ -69,67 +78,70 @@ static void consume_fifo_do(SH2 *sh2, unsigned int m68k_cycles, unsigned short *fifo_r = mem->pwm_fifo[1]; int sum = 0; - if (pwm_cycles == 0 || pwm_doing_fifo) + if (pwm.cycles == 0 || pwm.doing_fifo) return; elprintf(EL_PWM, "pwm: %u: consume %d/%d, %d,%d ptr %d", - m68k_cycles, sh2_cycles_diff, sh2_cycles_diff / pwm_cycles, - Pico32x.pwm_p[0], Pico32x.pwm_p[1], pwm_ptr); + m68k_cycles, sh2_cycles_diff, sh2_cycles_diff / pwm.cycles, + Pico32x.pwm_p[0], Pico32x.pwm_p[1], pwm.ptr); // this is for recursion from dreq1 writes - pwm_doing_fifo = 1; + pwm.doing_fifo = 1; - for (; sh2_cycles_diff >= pwm_cycles; sh2_cycles_diff -= pwm_cycles) + while (sh2_cycles_diff >= pwm.cycles) { + sh2_cycles_diff -= pwm.cycles; + if (Pico32x.pwm_p[0] > 0) { - fifo_l[0] = fifo_l[1]; - fifo_l[1] = fifo_l[2]; - fifo_l[2] = fifo_l[3]; + mem->pwm_index[0] = (mem->pwm_index[0]+1) % 4; Pico32x.pwm_p[0]--; - mem->pwm_current[0] = convert_sample(fifo_l[0]); - sum += mem->pwm_current[0]; + pwm.current[0] = convert_sample(fifo_l[mem->pwm_index[0]]); + sum |= (u16)pwm.current[0]; } if (Pico32x.pwm_p[1] > 0) { - fifo_r[0] = fifo_r[1]; - fifo_r[1] = fifo_r[2]; - fifo_r[2] = fifo_r[3]; + mem->pwm_index[1] = (mem->pwm_index[1]+1) % 4; Pico32x.pwm_p[1]--; - mem->pwm_current[1] = convert_sample(fifo_r[0]); - sum += mem->pwm_current[1]; + pwm.current[1] = convert_sample(fifo_r[mem->pwm_index[1]]); + sum |= (u16)pwm.current[1]; } - mem->pwm[pwm_ptr * 2 ] = mem->pwm_current[0]; - mem->pwm[pwm_ptr * 2 + 1] = mem->pwm_current[1]; - pwm_ptr = (pwm_ptr + 1) & (PWM_BUFF_LEN - 1); + mem->pwm[pwm.ptr * 2 ] = pwm.current[0]; + mem->pwm[pwm.ptr * 2 + 1] = pwm.current[1]; + pwm.ptr = (pwm.ptr + 1) & (PWM_BUFF_LEN - 1); - if (--Pico32x.pwm_irq_cnt == 0) { - Pico32x.pwm_irq_cnt = pwm_irq_reload; + if (--Pico32x.pwm_irq_cnt <= 0) { + Pico32x.pwm_irq_cnt = pwm.irq_reload; do_pwm_irq(sh2, m68k_cycles); + } else if (Pico32x.pwm_p[1] == 0 && pwm.irq_state >= PWM_IRQ_LOW) { + // buffer underrun. Reduce reload rate if above programmed setting. + if (pwm.irq_reload > pwm.irq_timer) + pwm.irq_reload--; + pwm.irq_state = PWM_IRQ_LOW; } } Pico32x.pwm_cycle_p = m68k_cycles * 3 - sh2_cycles_diff; - pwm_doing_fifo = 0; + pwm.doing_fifo = 0; if (sum != 0) - pwm_silent = 0; + pwm.silent = 0; } static int p32x_pwm_schedule_(SH2 *sh2, unsigned int m68k_now) { - unsigned int sh2_now = m68k_now * 3; + unsigned int pwm_now = m68k_now * 3; int cycles_diff_sh2; - if (pwm_cycles == 0) + if (pwm.cycles == 0) return 0; - cycles_diff_sh2 = sh2_now - Pico32x.pwm_cycle_p; - if (cycles_diff_sh2 >= pwm_cycles) + cycles_diff_sh2 = pwm_now - Pico32x.pwm_cycle_p; + if (cycles_diff_sh2 >= pwm.cycles) consume_fifo_do(sh2, m68k_now, cycles_diff_sh2); if (!((Pico32x.sh2irq_mask[0] | Pico32x.sh2irq_mask[1]) & 1)) return 0; // masked by everyone - cycles_diff_sh2 = sh2_now - Pico32x.pwm_cycle_p; - return (Pico32x.pwm_irq_cnt * pwm_cycles + cycles_diff_sh2 = pwm_now - Pico32x.pwm_cycle_p; + return (Pico32x.pwm_irq_cnt * pwm.cycles - cycles_diff_sh2) / 3 + 1; } @@ -158,29 +170,28 @@ void p32x_pwm_irq_event(unsigned int m68k_now) p32x_pwm_schedule(m68k_now); } -unsigned int p32x_pwm_read16(unsigned int a, SH2 *sh2, - unsigned int m68k_cycles) +unsigned int p32x_pwm_read16(u32 a, SH2 *sh2, unsigned int m68k_cycles) { unsigned int d = 0; consume_fifo(sh2, m68k_cycles); a &= 0x0e; - switch (a) { - case 0: // control - case 2: // cycle + switch (a/2) { + case 0/2: // control + case 2/2: // cycle d = Pico32x.regs[(0x30 + a) / 2]; break; - case 4: // L ch + case 4/2: // L ch if (Pico32x.pwm_p[0] == 3) d |= P32XP_FULL; else if (Pico32x.pwm_p[0] == 0) d |= P32XP_EMPTY; break; - case 6: // R ch - case 8: // MONO + case 6/2: // R ch + case 8/2: // MONO if (Pico32x.pwm_p[1] == 3) d |= P32XP_FULL; else if (Pico32x.pwm_p[1] == 0) @@ -193,54 +204,68 @@ unsigned int p32x_pwm_read16(unsigned int a, SH2 *sh2, return d; } -void p32x_pwm_write16(unsigned int a, unsigned int d, - SH2 *sh2, unsigned int m68k_cycles) +void p32x_pwm_write16(u32 a, unsigned int d, SH2 *sh2, unsigned int m68k_cycles) { + unsigned short *fifo; + int idx; + elprintf(EL_PWM, "pwm: %u: w16 %02x %04x (p %d %d)", m68k_cycles, a & 0x0e, d, Pico32x.pwm_p[0], Pico32x.pwm_p[1]); consume_fifo(sh2, m68k_cycles); a &= 0x0e; - if (a == 0) { // control - // avoiding pops.. - if ((Pico32x.regs[0x30 / 2] & 0x0f) == 0) - Pico32xMem->pwm_fifo[0][0] = Pico32xMem->pwm_fifo[1][0] = 0; - Pico32x.regs[0x30 / 2] = d; - p32x_pwm_ctl_changed(); - Pico32x.pwm_irq_cnt = pwm_irq_reload; // ? - } - else if (a == 2) { // cycle - Pico32x.regs[0x32 / 2] = d & 0x0fff; - p32x_pwm_ctl_changed(); - } - else if (a <= 8) { - d = (d - 1) & 0x0fff; - - if (a == 4 || a == 8) { // L ch or MONO - unsigned short *fifo = Pico32xMem->pwm_fifo[0]; + switch (a/2) { + case 0/2: // control + // avoiding pops.. + if ((Pico32x.regs[0x30 / 2] & 0x0f) == 0) + Pico32xMem->pwm_fifo[0][0] = Pico32xMem->pwm_fifo[1][0] = 0; + Pico32x.regs[0x30 / 2] = d; + p32x_pwm_ctl_changed(); + Pico32x.pwm_irq_cnt = pwm.irq_reload; // ? + break; + case 2/2: // cycle + Pico32x.regs[0x32 / 2] = d & 0x0fff; + p32x_pwm_ctl_changed(); + break; + case 8/2: // MONO + case 6/2: // R ch + fifo = Pico32xMem->pwm_fifo[1]; + idx = Pico32xMem->pwm_index[1]; + if (Pico32x.pwm_p[1] < 3) { + if (Pico32x.pwm_p[1] == 2 && pwm.irq_state >= PWM_IRQ_STOPPED) { + // buffer full. If there was no buffer underrun after last fill, + // try increasing reload rate to reduce IRQs + if (pwm.irq_reload < 3 && pwm.irq_state == PWM_IRQ_HIGH) + pwm.irq_reload ++; + pwm.irq_state = PWM_IRQ_HIGH; + } + Pico32x.pwm_p[1]++; + } else { + // buffer overflow. Some roms always fill the complete buffer even if + // reload rate is set below max. Lock reload rate to programmed setting. + pwm.irq_reload = pwm.irq_timer; + pwm.irq_state = PWM_IRQ_LOCKED; + idx = (idx+1) % 4; + Pico32xMem->pwm_index[1] = idx; + } + fifo[(idx+Pico32x.pwm_p[1]) % 4] = (d - 1) & 0x0fff; + if (a != 8) break; // fallthrough if MONO + case 4/2: // L ch + fifo = Pico32xMem->pwm_fifo[0]; + idx = Pico32xMem->pwm_index[0]; if (Pico32x.pwm_p[0] < 3) Pico32x.pwm_p[0]++; else { - fifo[1] = fifo[2]; - fifo[2] = fifo[3]; + idx = (idx+1) % 4; + Pico32xMem->pwm_index[0] = idx; } - fifo[Pico32x.pwm_p[0]] = d; - } - if (a == 6 || a == 8) { // R ch or MONO - unsigned short *fifo = Pico32xMem->pwm_fifo[1]; - if (Pico32x.pwm_p[1] < 3) - Pico32x.pwm_p[1]++; - else { - fifo[1] = fifo[2]; - fifo[2] = fifo[3]; - } - fifo[Pico32x.pwm_p[1]] = d; - } + fifo[(idx+Pico32x.pwm_p[0]) % 4] = (d - 1) & 0x0fff; + break; } } -void p32x_pwm_update(int *buf32, int length, int stereo) +void p32x_pwm_update(s32 *buf32, int length, int stereo) { short *pwmb; int step; @@ -252,10 +277,10 @@ void p32x_pwm_update(int *buf32, int length, int stereo) xmd = Pico32x.regs[0x30 / 2] & 0x0f; if (xmd == 0 || xmd == 0x06 || xmd == 0x09 || xmd == 0x0f) goto out; // invalid? - if (pwm_silent) + if (pwm.silent) return; - step = (pwm_ptr << 16) / length; + step = (pwm.ptr << 16) / length; pwmb = Pico32xMem->pwm; if (stereo) @@ -310,13 +335,12 @@ void p32x_pwm_update(int *buf32, int length, int stereo) } } - elprintf(EL_PWM, "pwm_update: pwm_ptr %d, len %d, step %04x, done %d", - pwm_ptr, length, step, (pwmb - Pico32xMem->pwm) / 2); + elprintf(EL_PWM, "pwm_update: pwm.ptr %d, len %d, step %04x, done %d", + pwm.ptr, length, step, (pwmb - Pico32xMem->pwm) / 2); out: - pwm_ptr = 0; - pwm_silent = Pico32xMem->pwm_current[0] == 0 - && Pico32xMem->pwm_current[1] == 0; + pwm.ptr = 0; + pwm.silent = pwm.current[0] == 0 && pwm.current[1] == 0; } void p32x_pwm_state_loaded(void) @@ -326,11 +350,11 @@ void p32x_pwm_state_loaded(void) p32x_pwm_ctl_changed(); // for old savestates - cycles_diff_sh2 = SekCycleCnt * 3 - Pico32x.pwm_cycle_p; - if (cycles_diff_sh2 >= pwm_cycles || cycles_diff_sh2 < 0) { - Pico32x.pwm_irq_cnt = pwm_irq_reload; - Pico32x.pwm_cycle_p = SekCycleCnt * 3; - p32x_pwm_schedule(SekCycleCnt); + cycles_diff_sh2 = Pico.t.m68c_cnt * 3 - Pico32x.pwm_cycle_p; + if (cycles_diff_sh2 >= pwm.cycles || cycles_diff_sh2 < 0) { + Pico32x.pwm_irq_cnt = pwm.irq_reload; + Pico32x.pwm_cycle_p = Pico.t.m68c_cnt * 3; + p32x_pwm_schedule(Pico.t.m68c_cnt); } } diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index 62423d13..51169a88 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -1,6 +1,7 @@ /* * SH2 peripherals/"system on chip" * (C) notaz, 2013 + * (C) irixxxx, 2019-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -25,11 +26,14 @@ #include "../pico_int.h" #include "../memory.h" +#include +DRC_DECLARE_SR; + // DMAC handling struct dma_chan { - unsigned int sar, dar; // src, dst addr - unsigned int tcr; // transfer count - unsigned int chcr; // chan ctl + u32 sar, dar; // src, dst addr + u32 tcr; // transfer count + u32 chcr; // chan ctl // -- dm dm sm sm ts ts ar am al ds dl tb ta ie te de // ts - transfer size: 1, 2, 4, 16 bytes // ar - auto request if 1, else dreq signal @@ -44,11 +48,11 @@ struct dma_chan { struct dmac { struct dma_chan chan[2]; - unsigned int vcrdma0; - unsigned int unknown0; - unsigned int vcrdma1; - unsigned int unknown1; - unsigned int dmaor; + u32 vcrdma0; + u32 unknown0; + u32 vcrdma1; + u32 unknown1; + u32 dmaor; // -- pr ae nmif dme // pr - priority: chan0 > chan1 or round-robin // ae - address error @@ -73,7 +77,7 @@ static void dmac_transfer_complete(SH2 *sh2, struct dma_chan *chan) { chan->chcr |= DMA_TE; // DMA has ended normally - p32x_sh2_poll_event(sh2, SH2_STATE_SLEEP, SekCyclesDone()); + p32x_sh2_poll_event(sh2->poll_addr, sh2, SH2_STATE_SLEEP, SekCyclesDone()); if (chan->chcr & DMA_IE) dmac_te_irq(sh2, chan); } @@ -87,6 +91,7 @@ static void dmac_transfer_one(SH2 *sh2, struct dma_chan *chan) case 0: d = p32x_sh2_read8(chan->sar, sh2); p32x_sh2_write8(chan->dar, d, sh2); + break; case 1: d = p32x_sh2_read16(chan->sar, sh2); p32x_sh2_write16(chan->dar, d, sh2); @@ -125,6 +130,25 @@ static void dmac_transfer_one(SH2 *sh2, struct dma_chan *chan) chan->sar += size; } +// optimization for copying around memory with SH2 DMA +static void dmac_memcpy(struct dma_chan *chan, SH2 *sh2) +{ + u32 size = (chan->chcr >> 10) & 3, up = chan->chcr & (1 << 14); + int count; + + if (!up || chan->tcr < 4) + return; + + if (size == 3) size = 2; // 4-word xfer mode still counts in words + // XXX check TCR being a multiple of 4 in 4-word xfer mode? + // XXX check alignment of sar/dar, generating a bus error if unaligned? + count = p32x_sh2_memcpy(chan->dar, chan->sar, chan->tcr, 1 << size, sh2); + + chan->sar += count << size; + chan->dar += count << size; + chan->tcr -= count; +} + // DMA trigger by SH2 register write static void dmac_trigger(SH2 *sh2, struct dma_chan *chan) { @@ -134,6 +158,12 @@ static void dmac_trigger(SH2 *sh2, struct dma_chan *chan) if (chan->chcr & DMA_AR) { // auto-request transfer + sh2->state |= SH2_STATE_SLEEP; + if ((((chan->chcr >> 12) ^ (chan->chcr >> 14)) & 3) == 0 && + (((chan->chcr >> 14) ^ (chan->chcr >> 15)) & 1) == 1) { + // SM == DM and either DM0 or DM1 are set. check for mem to mem copy + dmac_memcpy(chan, sh2); + } while ((int)chan->tcr > 0) dmac_transfer_one(sh2, chan); dmac_transfer_complete(sh2, chan); @@ -160,8 +190,9 @@ static void dmac_trigger(SH2 *sh2, struct dma_chan *chan) } // timer state - FIXME -static int timer_cycles[2]; -static int timer_tick_cycles[2]; +static u32 timer_cycles[2]; +static u32 timer_tick_cycles[2]; +static u32 timer_tick_factor[2]; // timers void p32x_timers_recalc(void) @@ -171,6 +202,9 @@ void p32x_timers_recalc(void) // SH2 timer step for (i = 0; i < 2; i++) { + sh2s[i].state &= ~SH2_TIMER_RUN; + if (PREG8(sh2s[i].peri_regs, 0x80) & 0x20) // TME + sh2s[i].state |= SH2_TIMER_RUN; tmp = PREG8(sh2s[i].peri_regs, 0x80) & 7; // Sclk cycles per timer tick if (tmp) @@ -178,36 +212,35 @@ void p32x_timers_recalc(void) else cycles = 2; timer_tick_cycles[i] = cycles; + timer_tick_factor[i] = (1ULL << 32) / cycles; timer_cycles[i] = 0; elprintf(EL_32XP, "WDT cycles[%d] = %d", i, cycles); } } -void p32x_timers_do(unsigned int m68k_slice) +NOINLINE void p32x_timer_do(SH2 *sh2, unsigned int m68k_slice) { unsigned int cycles = m68k_slice * 3; - int cnt, i; + void *pregs = sh2->peri_regs; + int cnt; int i = sh2->is_slave; - // WDT timers - for (i = 0; i < 2; i++) { - void *pregs = sh2s[i].peri_regs; - if (PREG8(pregs, 0x80) & 0x20) { // TME - timer_cycles[i] += cycles; - cnt = PREG8(pregs, 0x81); - while (timer_cycles[i] >= timer_tick_cycles[i]) { - timer_cycles[i] -= timer_tick_cycles[i]; - cnt++; - } - if (cnt >= 0x100) { - int level = PREG8(pregs, 0xe3) >> 4; - int vector = PREG8(pregs, 0xe4) & 0x7f; - elprintf(EL_32XP, "%csh2 WDT irq (%d, %d)", - i ? 's' : 'm', level, vector); - sh2_internal_irq(&sh2s[i], level, vector); - cnt &= 0xff; - } - PREG8(pregs, 0x81) = cnt; + // WDT timer + timer_cycles[i] += cycles; + if (timer_cycles[i] > timer_tick_cycles[i]) { + // cnt = timer_cycles[i] / timer_tick_cycles[i]; + cnt = (1ULL * timer_cycles[i] * timer_tick_factor[i]) >> 32; + timer_cycles[i] -= timer_tick_cycles[i] * cnt; + + cnt += PREG8(pregs, 0x81); + if (cnt >= 0x100) { + int level = PREG8(pregs, 0xe3) >> 4; + int vector = PREG8(pregs, 0xe4) & 0x7f; + elprintf(EL_32XP, "%csh2 WDT irq (%d, %d)", + i ? 's' : 'm', level, vector); + sh2_internal_irq(sh2, level, vector); + cnt &= 0xff; } + PREG8(pregs, 0x81) = cnt; } } @@ -225,40 +258,62 @@ void sh2_peripheral_reset(SH2 *sh2) // SH2 internal peripheral memhandlers // we keep them in little endian format -u32 sh2_peripheral_read8(u32 a, SH2 *sh2) +u32 REGPARM(2) sh2_peripheral_read8(u32 a, SH2 *sh2) { u8 *r = (void *)sh2->peri_regs; u32 d; + DRC_SAVE_SR(sh2); a &= 0x1ff; d = PREG8(r, a); elprintf_sh2(sh2, EL_32XP, "peri r8 [%08x] %02x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); + if ((a & 0x1c0) == 0x140) { + // abused as comm area + p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 3); + } + DRC_RESTORE_SR(sh2); return d; } -u32 sh2_peripheral_read16(u32 a, SH2 *sh2) +u32 REGPARM(2) sh2_peripheral_read16(u32 a, SH2 *sh2) { u16 *r = (void *)sh2->peri_regs; u32 d; - a &= 0x1ff; - d = r[(a / 2) ^ 1]; + DRC_SAVE_SR(sh2); + a &= 0x1fe; + d = r[MEM_BE2(a / 2)]; elprintf_sh2(sh2, EL_32XP, "peri r16 [%08x] %04x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); + if ((a & 0x1c0) == 0x140) { + // abused as comm area + p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 3); + } + DRC_RESTORE_SR(sh2); return d; } -u32 sh2_peripheral_read32(u32 a, SH2 *sh2) +u32 REGPARM(2) sh2_peripheral_read32(u32 a, SH2 *sh2) { u32 d; + + DRC_SAVE_SR(sh2); a &= 0x1fc; d = sh2->peri_regs[a / 4]; elprintf_sh2(sh2, EL_32XP, "peri r32 [%08x] %08x @%06x", a | ~0x1ff, d, sh2_pc(sh2)); + if (a == 0x18c) + // kludge for polling COMM while polling for end of DMA + sh2->poll_cnt = 0; + else if ((a & 0x1c0) == 0x140) { + // abused as comm area + p32x_sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 3); + } + DRC_RESTORE_SR(sh2); return d; } @@ -302,18 +357,18 @@ void REGPARM(3) sh2_peripheral_write8(u32 a, u32 d, SH2 *sh2) u8 *r = (void *)sh2->peri_regs; u8 old; + DRC_SAVE_SR(sh2); elprintf_sh2(sh2, EL_32XP, "peri w8 [%08x] %02x @%06x", a, d, sh2_pc(sh2)); a &= 0x1ff; old = PREG8(r, a); + PREG8(r, a) = d; switch (a) { case 0x002: // SCR - serial control - if (!(PREG8(r, a) & 0x20) && (d & 0x20)) { // TE being set - PREG8(r, a) = d; + if (!(old & 0x20) && (d & 0x20)) // TE being set sci_trigger(sh2, r); - } break; case 0x003: // TDR - transmit data break; @@ -321,28 +376,35 @@ void REGPARM(3) sh2_peripheral_write8(u32 a, u32 d, SH2 *sh2) d = (old & (d | 0x06)) | (d & 1); PREG8(r, a) = d; sci_trigger(sh2, r); - return; + break; case 0x005: // RDR - receive data break; case 0x010: // TIER if (d & 0x8e) elprintf(EL_32XP|EL_ANOMALY, "TIER: %02x", d); d = (d & 0x8e) | 1; + PREG8(r, a) = d; break; case 0x017: // TOCR d |= 0xe0; + PREG8(r, a) = d; break; + default: + if ((a & 0x1c0) == 0x140) + p32x_sh2_poll_event(a, sh2, SH2_STATE_CPOLL, SekCyclesDone()); } - PREG8(r, a) = d; + DRC_RESTORE_SR(sh2); } void REGPARM(3) sh2_peripheral_write16(u32 a, u32 d, SH2 *sh2) { u16 *r = (void *)sh2->peri_regs; + + DRC_SAVE_SR(sh2); elprintf_sh2(sh2, EL_32XP, "peri w16 [%08x] %04x @%06x", a, d, sh2_pc(sh2)); - a &= 0x1ff; + a &= 0x1fe; // evil WDT if (a == 0x80) { @@ -352,17 +414,21 @@ void REGPARM(3) sh2_peripheral_write16(u32 a, u32 d, SH2 *sh2) } if ((d & 0xff00) == 0x5a00) // WTCNT PREG8(r, 0x81) = d; - return; + } else { + r[MEM_BE2(a / 2)] = d; + if ((a & 0x1c0) == 0x140) + p32x_sh2_poll_event(a, sh2, SH2_STATE_CPOLL, SekCyclesDone()); } - - r[(a / 2) ^ 1] = d; + DRC_RESTORE_SR(sh2); } void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2) { u32 *r = sh2->peri_regs; u32 old; + struct dmac *dmac; + DRC_SAVE_SR(sh2); elprintf_sh2(sh2, EL_32XP, "peri w32 [%08x] %08x @%06x", a, d, sh2_pc(sh2)); @@ -370,24 +436,25 @@ void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2) old = r[a / 4]; r[a / 4] = d; + // TODO: DRC doesn't correctly extend 'd' parameter register to 64bit :-/ switch (a) { // division unit (TODO: verify): case 0x104: // DVDNT: divident L, starts divide elprintf_sh2(sh2, EL_32XP, "divide %08x / %08x", - d, r[0x100 / 4]); + r[0x104 / 4], r[0x100 / 4]); if (r[0x100 / 4]) { signed int divisor = r[0x100 / 4]; - r[0x118 / 4] = r[0x110 / 4] = (signed int)d % divisor; - r[0x104 / 4] = r[0x11c / 4] = r[0x114 / 4] = (signed int)d / divisor; + r[0x118 / 4] = r[0x110 / 4] = (signed int)r[0x104 / 4] % divisor; + r[0x104 / 4] = r[0x11c / 4] = r[0x114 / 4] = (signed int)r[0x104 / 4] / divisor; } else r[0x110 / 4] = r[0x114 / 4] = r[0x118 / 4] = r[0x11c / 4] = 0; // ? break; case 0x114: elprintf_sh2(sh2, EL_32XP, "divide %08x%08x / %08x @%08x", - r[0x110 / 4], d, r[0x100 / 4], sh2_pc(sh2)); + r[0x110 / 4], r[0x114 / 4], r[0x100 / 4], sh2_pc(sh2)); if (r[0x100 / 4]) { - signed long long divident = (signed long long)r[0x110 / 4] << 32 | d; + signed long long divident = (signed long long)r[0x110 / 4] << 32 | r[0x114 / 4]; signed int divisor = r[0x100 / 4]; // XXX: undocumented mirroring to 0x118,0x11c? r[0x118 / 4] = r[0x110 / 4] = divident % divisor; @@ -402,21 +469,27 @@ void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2) else r[0x110 / 4] = r[0x114 / 4] = r[0x118 / 4] = r[0x11c / 4] = 0; // ? break; + // perhaps starting a DMA? + case 0x18c: + case 0x19c: + case 0x1b0: + dmac = (void *)&sh2->peri_regs[0x180 / 4]; + if (a == 0x1b0 && !((old ^ d) & d & DMA_DME)) + return; + if (!(dmac->dmaor & DMA_DME)) + return; + + if ((dmac->chan[0].chcr & (DMA_TE|DMA_DE)) == DMA_DE) + dmac_trigger(sh2, &dmac->chan[0]); + if ((dmac->chan[1].chcr & (DMA_TE|DMA_DE)) == DMA_DE) + dmac_trigger(sh2, &dmac->chan[1]); + break; + default: + if ((a & 0x1c0) == 0x140) + p32x_sh2_poll_event(a, sh2, SH2_STATE_CPOLL, SekCyclesDone()); } - // perhaps starting a DMA? - if (a == 0x1b0 || a == 0x18c || a == 0x19c) { - struct dmac *dmac = (void *)&sh2->peri_regs[0x180 / 4]; - if (a == 0x1b0 && !((old ^ d) & d & DMA_DME)) - return; - if (!(dmac->dmaor & DMA_DME)) - return; - - if ((dmac->chan[0].chcr & (DMA_TE|DMA_DE)) == DMA_DE) - dmac_trigger(sh2, &dmac->chan[0]); - if ((dmac->chan[1].chcr & (DMA_TE|DMA_DE)) == DMA_DE) - dmac_trigger(sh2, &dmac->chan[1]); - } + DRC_RESTORE_SR(sh2); } /* 32X specific */ @@ -466,7 +539,9 @@ static void dreq1_do(SH2 *sh2, struct dma_chan *chan) if ((chan->dar & ~0xf) != 0x20004030) elprintf(EL_32XP|EL_ANOMALY, "dreq1: bad dar?: %08x\n", chan->dar); + sh2->state |= SH2_STATE_SLEEP; dmac_transfer_one(sh2, chan); + sh2->state &= ~SH2_STATE_SLEEP; if (chan->tcr == 0) dmac_transfer_complete(sh2, chan); } diff --git a/pico/arm_features.h b/pico/arm_features.h index fdec5229..b772b77c 100644 --- a/pico/arm_features.h +++ b/pico/arm_features.h @@ -49,4 +49,32 @@ #endif +// indexed branch (XB) via branch table (BT) +#ifdef __PIC__ +#define PIC_XB(c,r,s) add##c pc, r, s +#define PIC_BT(a) b a +#else +#define PIC_XB(c,r,s) ldr##c pc, [pc, r, s] +#define PIC_BT(a) .word a +#endif + +// load data address (LDR) either via literal pool or via GOT +#ifdef __PIC__ +// can't use pool loads since ldr= only allows a symbol or a constant expr :-( +#define PIC_LDR_INIT() \ + .macro pic_ldr r t a; \ + ldr \r, [pc, $.LD\@-.-8]; \ + ldr \t, [pc, $.LD\@-.-4]; \ + .LP\@:add \r, pc; \ + ldr \r, [\r, \t]; \ + add pc, $4; \ + .LD\@:.word _GLOBAL_OFFSET_TABLE_-.LP\@-8; \ + .word \a(GOT); \ + .endm; +#define PIC_LDR(r,t,a) pic_ldr r, t, a +#else +#define PIC_LDR_INIT() +#define PIC_LDR(r,t,a) ldr r, =a +#endif + #endif /* __ARM_FEATURES_H__ */ diff --git a/pico/cart.c b/pico/cart.c index 6a835b63..a196dfdf 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -2,20 +2,25 @@ * PicoDrive * (c) Copyright Dave, 2004 * (C) notaz, 2006-2010 + * (C) irixxxx, 2020-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ #include "pico_int.h" -#include "../zlib/zlib.h" -#include "../cpu/debug.h" -#include "../unzip/unzip.h" -#include "../unzip/unzip_stream.h" +#include +#if defined(USE_LIBCHDR) +#include "libchdr/chd.h" +#include "libchdr/cdrom.h" +#endif + +#include +#include static int rom_alloc_size; -static const char *rom_exts[] = { "bin", "gen", "smd", "iso", "sms", "gg", "sg" }; +static const char *rom_exts[] = { "bin", "gen", "smd", "md", "32x", "pco", "iso", "sms", "gg", "sg", "sc" }; void (*PicoCartUnloadHook)(void); void (*PicoCartMemSetup)(void); @@ -26,6 +31,7 @@ void (*PicoCDLoadProgressCB)(const char *fname, int percent) = NULL; // handled int PicoGameLoaded; static void PicoCartDetect(const char *carthw_cfg); +static void PicoCartDetectMS(void); /* cso struct */ typedef struct _cso_struct @@ -50,7 +56,7 @@ typedef struct _cso_struct } cso_struct; -static int uncompress2(void *dest, int destLen, void *source, int sourceLen) +static int uncompress_buf(void *dest, int destLen, void *source, int sourceLen) { z_stream stream; int err; @@ -90,6 +96,29 @@ static const char *get_ext(const char *path) return ext; } +struct zip_file { + pm_file file; + ZIP *zip; + struct zipent *entry; + z_stream stream; + unsigned char inbuf[16384]; + long start; + unsigned int pos; +}; + +#if defined(USE_LIBCHDR) +struct chd_struct { + pm_file file; + int fpos; + int sectorsize; + chd_file *chd; + int unitbytes; + int hunkunits; + u8 *hunk; + int hunknum; +}; +#endif + pm_file *pm_open(const char *path) { pm_file *file = NULL; @@ -102,10 +131,10 @@ pm_file *pm_open(const char *path) ext = get_ext(path); if (strcasecmp(ext, "zip") == 0) { + struct zip_file *zfile = NULL; struct zipent *zipentry; - gzFile gzf = NULL; ZIP *zipfile; - int i; + int i, ret; zipfile = openzip(path); if (zipfile != NULL) @@ -127,37 +156,41 @@ pm_file *pm_open(const char *path) goto zip_failed; found_rom_zip: - /* try to convert to gzip stream, so we could use standard gzio functions from zlib */ - gzf = zip2gz(zipfile, zipentry); - if (gzf == NULL) goto zip_failed; - - file = calloc(1, sizeof(*file)); - if (file == NULL) goto zip_failed; - file->file = zipfile; - file->param = gzf; - file->size = zipentry->uncompressed_size; - file->type = PMT_ZIP; - strncpy(file->ext, ext, sizeof(file->ext) - 1); - return file; + zfile = calloc(1, sizeof(*zfile)); + if (zfile == NULL) + goto zip_failed; + ret = seekcompresszip(zipfile, zipentry); + if (ret != 0) + goto zip_failed; + ret = inflateInit2(&zfile->stream, -15); + if (ret != Z_OK) { + elprintf(EL_STATUS, "zip: inflateInit2 %d", ret); + goto zip_failed; + } + zfile->zip = zipfile; + zfile->entry = zipentry; + zfile->start = ftell(zipfile->fp); + zfile->file.file = zfile; + zfile->file.size = zipentry->uncompressed_size; + zfile->file.type = PMT_ZIP; + strncpy(zfile->file.ext, ext, sizeof(zfile->file.ext) - 1); + return &zfile->file; zip_failed: - if (gzf) { - gzclose(gzf); - zipfile->fp = NULL; // gzclose() closed it - } closezip(zipfile); + free(zfile); return NULL; } } else if (strcasecmp(ext, "cso") == 0) { cso_struct *cso = NULL, *tmp = NULL; - int size; + int i, size; f = fopen(path, "rb"); if (f == NULL) goto cso_failed; -#ifndef __EPOC32__ +#ifdef __GP2X__ /* we use our own buffering */ setvbuf(f, NULL, _IONBF, 0); #endif @@ -168,6 +201,9 @@ zip_failed: if (fread(&cso->header, 1, sizeof(cso->header), f) != sizeof(cso->header)) goto cso_failed; + cso->header.block_size = CPU_LE4(cso->header.block_size); + cso->header.total_bytes = CPU_LE4(cso->header.total_bytes); + cso->header.total_bytes_high = CPU_LE4(cso->header.total_bytes_high); if (strncmp(cso->header.magic, "CISO", 4) != 0) { elprintf(EL_STATUS, "cso: bad header"); @@ -191,6 +227,8 @@ zip_failed: elprintf(EL_STATUS, "cso: premature EOF"); goto cso_failed; } + for (i = 0; i < size/4; i++) + cso->index[i] = CPU_LE4(cso->index[i]); // all ok cso->fpos_in = ftell(f); @@ -202,6 +240,7 @@ zip_failed: file->param = cso; file->size = cso->header.total_bytes; file->type = PMT_CSO; + strncpy(file->ext, ext, sizeof(file->ext) - 1); return file; cso_failed: @@ -209,6 +248,50 @@ cso_failed: if (f != NULL) fclose(f); return NULL; } +#if defined(USE_LIBCHDR) + else if (strcasecmp(ext, "chd") == 0) + { + struct chd_struct *chd = NULL; + chd_file *cf = NULL; + const chd_header *head; + + if (chd_open(path, CHD_OPEN_READ, NULL, &cf) != CHDERR_NONE) + goto chd_failed; + + // sanity check + head = chd_get_header(cf); + if ((head->hunkbytes == 0) || (head->hunkbytes % CD_FRAME_SIZE)) + goto chd_failed; + + chd = calloc(1, sizeof(*chd)); + if (chd == NULL) + goto chd_failed; + chd->hunk = (u8 *)malloc(head->hunkbytes); + if (!chd->hunk) + goto chd_failed; + + chd->chd = cf; + chd->unitbytes = head->unitbytes; + chd->hunkunits = head->hunkbytes / head->unitbytes; + chd->sectorsize = CD_MAX_SECTOR_DATA; // default to RAW mode + + chd->fpos = 0; + chd->hunknum = -1; + + chd->file.file = chd; + chd->file.type = PMT_CHD; + // subchannel data is skipped, remove it from total size + chd->file.size = head->logicalbytes / CD_FRAME_SIZE * CD_MAX_SECTOR_DATA; + strncpy(chd->file.ext, ext, sizeof(chd->file.ext) - 1); + return &chd->file; + +chd_failed: + /* invalid CHD file */ + if (chd != NULL) free(chd); + if (cf != NULL) chd_close(cf); + return NULL; + } +#endif /* not a zip, treat as uncompressed file */ f = fopen(path, "rb"); @@ -227,7 +310,7 @@ cso_failed: strncpy(file->ext, ext, sizeof(file->ext) - 1); fseek(f, 0, SEEK_SET); -#ifndef __EPOC32__ // makes things worse on Symbian +#ifdef __GP2X__ if (file->size > 0x400000) /* we use our own buffering */ setvbuf(f, NULL, _IONBF, 0); @@ -236,23 +319,127 @@ cso_failed: return file; } +void pm_sectorsize(int length, pm_file *stream) +{ + // CHD reading needs to know how much binary data is in one data sector(=unit) +#if defined(USE_LIBCHDR) + if (stream->type == PMT_CHD) { + struct chd_struct *chd = stream->file; + chd->sectorsize = length; + if (chd->sectorsize > chd->unitbytes) + elprintf(EL_STATUS|EL_ANOMALY, "cd: sector size %d too large for unit %d", chd->sectorsize, chd->unitbytes); + } +#endif +} + +#if defined(USE_LIBCHDR) +static size_t _pm_read_chd(void *ptr, size_t bytes, pm_file *stream, int is_audio) +{ + int ret = 0; + + if (stream->type == PMT_CHD) { + struct chd_struct *chd = stream->file; + // calculate sector and offset in sector + int sectsz = is_audio ? CD_MAX_SECTOR_DATA : chd->sectorsize; + int sector = chd->fpos / sectsz; + int offset = chd->fpos - (sector * sectsz); + // calculate hunk and sector offset in hunk + int hunknum = sector / chd->hunkunits; + int hunksec = sector - (hunknum * chd->hunkunits); + int hunkofs = hunksec * chd->unitbytes; + + while (bytes != 0) { + // data left in current sector + int len = sectsz - offset; + + // update hunk cache if needed + if (hunknum != chd->hunknum) { + chd_read(chd->chd, hunknum, chd->hunk); + chd->hunknum = hunknum; + } + if (len > bytes) + len = bytes; + +#if CPU_IS_LE + if (is_audio) { + // convert big endian audio samples + u16 *dst = ptr, v; + u8 *src = chd->hunk + hunkofs + offset; + int i; + + for (i = 0; i < len; i += 4) { + v = *src++ << 8; *dst++ = v | *src++; + v = *src++ << 8; *dst++ = v | *src++; + } + } else +#endif + memcpy(ptr, chd->hunk + hunkofs + offset, len); + + // house keeping + ret += len; + chd->fpos += len; + bytes -= len; + + // no need to advance internals if there's no more data to read + if (bytes) { + ptr += len; + offset = 0; + + sector ++; + hunksec ++; + hunkofs += chd->unitbytes; + if (hunksec >= chd->hunkunits) { + hunksec = 0; + hunkofs = 0; + hunknum ++; + } + } + } + } + + return ret; +} +#endif + size_t pm_read(void *ptr, size_t bytes, pm_file *stream) { int ret; - if (stream->type == PMT_UNCOMPRESSED) + if (stream == NULL) + return -1; + else if (stream->type == PMT_UNCOMPRESSED) { ret = fread(ptr, 1, bytes, stream->file); } else if (stream->type == PMT_ZIP) { - gzFile gf = stream->param; - int err; - ret = gzread(gf, ptr, bytes); - err = gzerror2(gf); - if (ret > 0 && (err == Z_DATA_ERROR || err == Z_STREAM_END)) - /* we must reset stream pointer or else next seek/read fails */ - gzrewind(gf); + struct zip_file *z = stream->file; + + if (z->entry->compression_method == 0) { + int ret = fread(ptr, 1, bytes, z->zip->fp); + z->pos += ret; + return ret; + } + + z->stream.next_out = ptr; + z->stream.avail_out = bytes; + while (z->stream.avail_out != 0) { + if (z->stream.avail_in == 0) { + z->stream.avail_in = fread(z->inbuf, 1, sizeof(z->inbuf), z->zip->fp); + if (z->stream.avail_in == 0) + break; + z->stream.next_in = z->inbuf; + } + ret = inflate(&z->stream, Z_NO_FLUSH); + if (ret == Z_STREAM_END) + break; + if (ret != Z_OK) { + elprintf(EL_STATUS, "zip: inflate: %d", ret); + return 0; + } + } + z->pos += bytes - z->stream.avail_out; + return bytes - z->stream.avail_out; } else if (stream->type == PMT_CSO) { @@ -293,7 +480,7 @@ size_t pm_read(void *ptr, size_t bytes, pm_file *stream) } cso->block_in_buff = block; } - rret = uncompress2(tmp_dst, 2048, cso->in_buff, read_len); + rret = uncompress_buf(tmp_dst, 2048, cso->in_buff, read_len); if (rret != 0) { elprintf(EL_STATUS, "cso: uncompress failed @ %08x with %i", read_pos, rret); break; @@ -316,27 +503,98 @@ size_t pm_read(void *ptr, size_t bytes, pm_file *stream) index_end = cso->index[block+1]; } } +#if defined(USE_LIBCHDR) + else if (stream->type == PMT_CHD) + { + ret = _pm_read_chd(ptr, bytes, stream, 0); + } +#endif else ret = 0; return ret; } +size_t pm_read_audio(void *ptr, size_t bytes, pm_file *stream) +{ + if (stream == NULL) + return -1; +#if !(CPU_IS_LE) + else if (stream->type == PMT_UNCOMPRESSED) + { + // convert little endian audio samples from WAV file + int ret = pm_read(ptr, bytes, stream); + u16 *dst = ptr, v; + u8 *src = ptr; + int i; + + for (i = 0; i < ret; i += 4) { + v = *src++; *dst++ = v | (*src++ << 8); + v = *src++; *dst++ = v | (*src++ << 8); + } + return ret; + } + else +#endif +#if defined(USE_LIBCHDR) + if (stream->type == PMT_CHD) + { + return _pm_read_chd(ptr, bytes, stream, 1); + } +#endif + return pm_read(ptr, bytes, stream); +} + int pm_seek(pm_file *stream, long offset, int whence) { - if (stream->type == PMT_UNCOMPRESSED) + if (stream == NULL) + return -1; + else if (stream->type == PMT_UNCOMPRESSED) { fseek(stream->file, offset, whence); return ftell(stream->file); } else if (stream->type == PMT_ZIP) { - if (PicoMessage != NULL && offset > 6*1024*1024) { - long pos = gztell((gzFile) stream->param); - if (offset < pos || offset - pos > 6*1024*1024) - PicoMessage("Decompressing data..."); + struct zip_file *z = stream->file; + unsigned int pos = z->pos; + int ret; + + switch (whence) + { + case SEEK_CUR: pos += offset; break; + case SEEK_SET: pos = offset; break; + case SEEK_END: pos = stream->size - offset; break; } - return gzseek((gzFile) stream->param, offset, whence); + if (z->entry->compression_method == 0) { + ret = fseek(z->zip->fp, z->start + pos, SEEK_SET); + if (ret == 0) + return (z->pos = pos); + return -1; + } + offset = pos - z->pos; + if (pos < z->pos) { + // full decompress from the start + fseek(z->zip->fp, z->start, SEEK_SET); + z->stream.avail_in = 0; + z->stream.next_in = z->inbuf; + inflateReset(&z->stream); + z->pos = 0; + offset = pos; + } + + if (PicoIn.osdMessage != NULL && offset > 4 * 1024 * 1024) + PicoIn.osdMessage("Decompressing data..."); + + while (offset > 0) { + char buf[16 * 1024]; + size_t l = offset > sizeof(buf) ? sizeof(buf) : offset; + ret = pm_read(buf, l, stream); + if (ret != l) + break; + offset -= l; + } + return z->pos; } else if (stream->type == PMT_CSO) { @@ -349,6 +607,19 @@ int pm_seek(pm_file *stream, long offset, int whence) } return cso->fpos_out; } +#if defined(USE_LIBCHDR) + else if (stream->type == PMT_CHD) + { + struct chd_struct *chd = stream->file; + switch (whence) + { + case SEEK_CUR: chd->fpos += offset; break; + case SEEK_SET: chd->fpos = offset; break; + case SEEK_END: chd->fpos = stream->size - offset; break; + } + return chd->fpos; + } +#endif else return -1; } @@ -365,16 +636,24 @@ int pm_close(pm_file *fp) } else if (fp->type == PMT_ZIP) { - ZIP *zipfile = fp->file; - gzclose((gzFile) fp->param); - zipfile->fp = NULL; // gzclose() closed it - closezip(zipfile); + struct zip_file *z = fp->file; + inflateEnd(&z->stream); + closezip(z->zip); } else if (fp->type == PMT_CSO) { free(fp->param); fclose(fp->file); } +#if defined(USE_LIBCHDR) + else if (fp->type == PMT_CHD) + { + struct chd_struct *chd = fp->file; + chd_close(chd->chd); + if (chd->hunk) + free(chd->hunk); + } +#endif else ret = EOF; @@ -385,6 +664,7 @@ int pm_close(pm_file *fp) // byteswap, data needs to be int aligned, src can match dst void Byteswap(void *dst, const void *src, int len) { +#if CPU_IS_LE const unsigned int *ps = src; unsigned int *pd = dst; int i, m; @@ -397,14 +677,15 @@ void Byteswap(void *dst, const void *src, int len) unsigned int t = ps[i]; pd[i] = ((t & m) << 8) | ((t & ~m) >> 8); } +#endif } // Interleve a 16k block and byteswap static int InterleveBlock(unsigned char *dest,unsigned char *src) { int i=0; - for (i=0;i<0x2000;i++) dest[(i<<1) ]=src[ i]; // Odd - for (i=0;i<0x2000;i++) dest[(i<<1)+1]=src[0x2000+i]; // Even + for (i=0;i<0x2000;i++) dest[(i<<1)+MEM_BE2(1)]=src[ i]; // Odd + for (i=0;i<0x2000;i++) dest[(i<<1)+MEM_BE2(0)]=src[0x2000+i]; // Even return 0; } @@ -433,26 +714,22 @@ static unsigned char *PicoCartAlloc(int filesize, int is_sms) { unsigned char *rom; + // make size power of 2 for easier banking handling + int s = 0, tmp = filesize; + while ((tmp >>= 1) != 0) + s++; + if (filesize > (1 << s)) + s++; + rom_alloc_size = 1 << s; + if (is_sms) { - // make size power of 2 for easier banking handling - int s = 0, tmp = filesize; - while ((tmp >>= 1) != 0) - s++; - if (filesize > (1 << s)) - s++; - rom_alloc_size = 1 << s; // be sure we can cover all address space if (rom_alloc_size < 0x10000) rom_alloc_size = 0x10000; } else { - // make alloc size at least sizeof(mcd_state), - // in case we want to switch to CD mode - if (filesize < sizeof(mcd_state)) - filesize = sizeof(mcd_state); - // align to 512K for memhandlers - rom_alloc_size = (filesize + 0x7ffff) & ~0x7ffff; + rom_alloc_size = (rom_alloc_size + 0x7ffff) & ~0x7ffff; } if (rom_alloc_size - filesize < 4) @@ -464,65 +741,75 @@ static unsigned char *PicoCartAlloc(int filesize, int is_sms) return rom; } -int PicoCartLoad(pm_file *f,unsigned char **prom,unsigned int *psize,int is_sms) +int PicoCartLoad(pm_file *f, const unsigned char *rom, unsigned int romsize, + unsigned char **prom, unsigned int *psize, int is_sms) { - unsigned char *rom; + unsigned char *rom_data = NULL; int size, bytes_read; - if (f == NULL) + if (!f && !rom) return 1; - size = f->size; + if (!rom) + size = f->size; + else + size = romsize; + if (size <= 0) return 1; size = (size+3)&~3; // Round up to a multiple of 4 // Allocate space for the rom plus padding - rom = PicoCartAlloc(size, is_sms); - if (rom == NULL) { + rom_data = PicoCartAlloc(size, is_sms); + if (rom_data == NULL) { elprintf(EL_STATUS, "out of memory (wanted %i)", size); return 2; } - if (PicoCartLoadProgressCB != NULL) - { - // read ROM in blocks, just for fun - int ret; - unsigned char *p = rom; - bytes_read=0; - do + if (!rom) { + if (PicoCartLoadProgressCB != NULL) { - int todo = size - bytes_read; - if (todo > 256*1024) todo = 256*1024; - ret = pm_read(p,todo,f); - bytes_read += ret; - p += ret; - PicoCartLoadProgressCB(bytes_read * 100 / size); + // read ROM in blocks, just for fun + int ret; + unsigned char *p = rom_data; + bytes_read=0; + do + { + int todo = size - bytes_read; + if (todo > 256*1024) todo = 256*1024; + ret = pm_read(p,todo,f); + bytes_read += ret; + p += ret; + PicoCartLoadProgressCB(bytes_read * 100LL / size); + } + while (ret > 0); + } + else + bytes_read = pm_read(rom_data,size,f); // Load up the rom + + if (bytes_read <= 0) { + elprintf(EL_STATUS, "read failed"); + plat_munmap(rom_data, rom_alloc_size); + return 3; } - while (ret > 0); } else - bytes_read = pm_read(rom,size,f); // Load up the rom - if (bytes_read <= 0) { - elprintf(EL_STATUS, "read failed"); - free(rom); - return 3; - } + memcpy(rom_data, rom, romsize); if (!is_sms) { // maybe we are loading MegaCD BIOS? - if (!(PicoAHW & PAHW_MCD) && size == 0x20000 && (!strncmp((char *)rom+0x124, "BOOT", 4) || - !strncmp((char *)rom+0x128, "BOOT", 4))) { - PicoAHW |= PAHW_MCD; + if (!(PicoIn.AHW & PAHW_MCD) && size == 0x20000 && (!strncmp((char *)rom_data+0x124, "BOOT", 4) || + !strncmp((char *)rom_data+0x128, "BOOT", 4))) { + PicoIn.AHW |= PAHW_MCD; } // Check for SMD: if (size >= 0x4200 && (size&0x3fff) == 0x200 && - ((rom[0x2280] == 'S' && rom[0x280] == 'E') || (rom[0x280] == 'S' && rom[0x2281] == 'E'))) { + ((rom_data[0x2280] == 'S' && rom_data[0x280] == 'E') || (rom_data[0x280] == 'S' && rom_data[0x2281] == 'E'))) { elprintf(EL_STATUS, "SMD format detected."); - DecodeSmd(rom,size); size-=0x200; // Decode and byteswap SMD + DecodeSmd(rom_data,size); size-=0x200; // Decode and byteswap SMD } - else Byteswap(rom, rom, size); // Just byteswap + else Byteswap(rom_data, rom_data, size); // Just byteswap } else { @@ -530,11 +817,11 @@ int PicoCartLoad(pm_file *f,unsigned char **prom,unsigned int *psize,int is_sms) elprintf(EL_STATUS, "SMD format detected."); // at least here it's not interleaved size -= 0x200; - memmove(rom, rom + 0x200, size); + memmove(rom_data, rom_data + 0x200, size); } } - if (prom) *prom = rom; + if (prom) *prom = rom_data; if (psize) *psize = size; return 0; @@ -547,14 +834,14 @@ int PicoCartInsert(unsigned char *rom, unsigned int romsize, const char *carthw_ // This will hang the emu, but will prevent nasty crashes. // note: 4 bytes are padded to every ROM if (rom != NULL) - *(unsigned long *)(rom+romsize) = 0xFFFE4EFA; // 4EFA FFFE byteswapped + *(u32 *)(rom+romsize) = CPU_BE2(0x4EFAFFFE); Pico.rom=rom; Pico.romsize=romsize; - if (SRam.data) { - free(SRam.data); - SRam.data = NULL; + if (Pico.sv.data) { + free(Pico.sv.data); + Pico.sv.data = NULL; } if (PicoCartUnloadHook != NULL) { @@ -563,7 +850,7 @@ int PicoCartInsert(unsigned char *rom, unsigned int romsize, const char *carthw_ } pdb_cleanup(); - PicoAHW &= PAHW_MCD|PAHW_SMS; + PicoIn.AHW &= ~(PAHW_32X|PAHW_SVP); PicoCartMemSetup = NULL; PicoDmaHook = NULL; @@ -572,13 +859,19 @@ int PicoCartInsert(unsigned char *rom, unsigned int romsize, const char *carthw_ PicoLoadStateHook = NULL; carthw_chunks = NULL; - if (!(PicoAHW & (PAHW_MCD|PAHW_SMS))) + if (!(PicoIn.AHW & (PAHW_MCD|PAHW_SMS|PAHW_PICO))) PicoCartDetect(carthw_cfg); + if (PicoIn.AHW & PAHW_SMS) + PicoCartDetectMS(); + if (PicoIn.AHW & PAHW_SVP) + PicoSVPStartup(); + if (PicoIn.AHW & PAHW_PICO) + PicoInitPico(); // setup correct memory map for loaded ROM - switch (PicoAHW) { + switch (PicoIn.AHW & ~(PAHW_GG|PAHW_SG|PAHW_SC)) { default: - elprintf(EL_STATUS|EL_ANOMALY, "starting in unknown hw configuration: %x", PicoAHW); + elprintf(EL_STATUS|EL_ANOMALY, "starting in unknown hw configuration: %x", PicoIn.AHW); case 0: case PAHW_SVP: PicoMemSetup(); break; case PAHW_MCD: PicoMemSetupCD(); break; @@ -589,7 +882,7 @@ int PicoCartInsert(unsigned char *rom, unsigned int romsize, const char *carthw_ if (PicoCartMemSetup != NULL) PicoCartMemSetup(); - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) PicoPowerMS(); else PicoPower(); @@ -616,8 +909,7 @@ void PicoCartUnload(void) PicoCartUnloadHook = NULL; } - if (PicoAHW & PAHW_32X) - PicoUnload32x(); + PicoUnload32x(); if (Pico.rom != NULL) { SekFinishIdleDet(); @@ -627,15 +919,16 @@ void PicoCartUnload(void) PicoGameLoaded = 0; } -static unsigned int rom_crc32(void) +static unsigned int rom_crc32(int size) { unsigned int crc; elprintf(EL_STATUS, "caclulating CRC32.."); + if (size <= 0 || size > Pico.romsize) size = Pico.romsize; // have to unbyteswap for calculation.. - Byteswap(Pico.rom, Pico.rom, Pico.romsize); - crc = crc32(0, Pico.rom, Pico.romsize); - Byteswap(Pico.rom, Pico.rom, Pico.romsize); + Byteswap(Pico.rom, Pico.rom, size); + crc = crc32(0, Pico.rom, size); + Byteswap(Pico.rom, Pico.rom, size); return crc; } @@ -646,7 +939,7 @@ static int rom_strcmp(int rom_offset, const char *s1) if (rom_offset + len > Pico.romsize) return 0; for (i = 0; i < len; i++) - if (s1[i] != s_rom[(i + rom_offset) ^ 1]) + if (s1[i] != s_rom[MEM_BE2(i + rom_offset)]) return 1; return 0; } @@ -713,7 +1006,8 @@ static int is_expr(const char *expr, char **pr) #include "carthw_cfg.c" -static void parse_carthw(const char *carthw_cfg, int *fill_sram) +static void parse_carthw(const char *carthw_cfg, int *fill_sram, + int *hw_detected) { int line = 0, any_checks_passed = 0, skip_sect = 0; const char *s, *builtin = builtin_carthw_cfg; @@ -825,7 +1119,7 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) goto bad; if (rom_crc == 0) - rom_crc = rom_crc32(); + rom_crc = rom_crc32(64*1024); if (crc == rom_crc) any_checks_passed = 1; else @@ -837,12 +1131,13 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) if (is_expr("hw", &p)) { if (!any_checks_passed) goto no_checks; + *hw_detected = 1; rstrip(p); if (strcmp(p, "svp") == 0) - PicoSVPStartup(); + PicoIn.AHW = PAHW_SVP; else if (strcmp(p, "pico") == 0) - PicoInitPico(); + PicoIn.AHW = PAHW_PICO; else if (strcmp(p, "prot") == 0) carthw_sprot_startup(); else if (strcmp(p, "ssf2_mapper") == 0) @@ -855,11 +1150,20 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) carthw_radica_startup(); else if (strcmp(p, "piersolar_mapper") == 0) carthw_pier_startup(); - else if (strcmp(p, "prot_lk3") == 0) - carthw_prot_lk3_startup(); + else if (strcmp(p, "sf001_mapper") == 0) + carthw_sf001_startup(); + else if (strcmp(p, "sf002_mapper") == 0) + carthw_sf002_startup(); + else if (strcmp(p, "sf004_mapper") == 0) + carthw_sf004_startup(); + else if (strcmp(p, "lk3_mapper") == 0) + carthw_lk3_startup(); + else if (strcmp(p, "smw64_mapper") == 0) + carthw_smw64_startup(); else { elprintf(EL_STATUS, "carthw:%d: unsupported mapper: %s", line, p); skip_sect = 1; + *hw_detected = 0; } continue; } @@ -884,8 +1188,8 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) elprintf(EL_STATUS, "carthw:%d: bad sram_range: %08x - %08x", line, start, end); goto bad_nomsg; } - SRam.start = start; - SRam.end = end; + Pico.sv.start = start; + Pico.sv.end = end; continue; } else if (is_expr("prop", &p)) { @@ -894,13 +1198,21 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) rstrip(p); if (strcmp(p, "no_sram") == 0) - SRam.flags &= ~SRF_ENABLED; + Pico.sv.flags &= ~SRF_ENABLED; else if (strcmp(p, "no_eeprom") == 0) - SRam.flags &= ~SRF_EEPROM; + Pico.sv.flags &= ~SRF_EEPROM; else if (strcmp(p, "filled_sram") == 0) *fill_sram = 1; + else if (strcmp(p, "wwfraw_hack") == 0) + PicoIn.quirks |= PQUIRK_WWFRAW_HACK; + else if (strcmp(p, "blackthorne_hack") == 0) + PicoIn.quirks |= PQUIRK_BLACKTHORNE_HACK; + else if (strcmp(p, "marscheck_hack") == 0) + PicoIn.quirks |= PQUIRK_MARSCHECK_HACK; else if (strcmp(p, "force_6btn") == 0) - PicoQuirks |= PQUIRK_FORCE_6BTN; + PicoIn.quirks |= PQUIRK_FORCE_6BTN; + else if (strcmp(p, "no_z80_bus_lock") == 0) + PicoIn.quirks |= PQUIRK_NO_Z80_BUS_LOCK; else { elprintf(EL_STATUS, "carthw:%d: unsupported prop: %s", line, p); goto bad_nomsg; @@ -917,8 +1229,8 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) type = strtoul(p, &r, 0); if (r == p || type < 0) goto bad; - SRam.eeprom_type = type; - SRam.flags |= SRF_EEPROM; + Pico.sv.eeprom_type = type; + Pico.sv.flags |= SRF_EEPROM; continue; } else if (is_expr("eeprom_lines", &p)) { @@ -933,9 +1245,9 @@ static void parse_carthw(const char *carthw_cfg, int *fill_sram) sda_out < 0 || sda_out > 15) goto bad; - SRam.eeprom_bit_cl = scl; - SRam.eeprom_bit_in = sda_in; - SRam.eeprom_bit_out= sda_out; + Pico.sv.eeprom_bit_cl = scl; + Pico.sv.eeprom_bit_in = sda_in; + Pico.sv.eeprom_bit_out= sda_out; continue; } else if ((tmp = is_expr("prot_ro_value16", &p)) || is_expr("prot_rw_value16", &p)) { @@ -973,61 +1285,114 @@ no_checks: */ static void PicoCartDetect(const char *carthw_cfg) { + int carthw_detected = 0; int fill_sram = 0; - memset(&SRam, 0, sizeof(SRam)); - if (Pico.rom[0x1B1] == 'R' && Pico.rom[0x1B0] == 'A') + memset(&Pico.sv, 0, sizeof(Pico.sv)); + if (Pico.rom[MEM_BE2(0x1B0)] == 'R' && Pico.rom[MEM_BE2(0x1B1)] == 'A') { - SRam.start = rom_read32(0x1B4) & ~0xff000001; // align - SRam.end = (rom_read32(0x1B8) & ~0xff000000) | 1; - if (Pico.rom[0x1B2] & 0x40) + Pico.sv.start = rom_read32(0x1B4) & ~0xff000001; // align + Pico.sv.end = (rom_read32(0x1B8) & ~0xff000000) | 1; + if (Pico.rom[MEM_BE2(0x1B3)] & 0x40) // EEPROM - SRam.flags |= SRF_EEPROM; - SRam.flags |= SRF_ENABLED; + Pico.sv.flags |= SRF_EEPROM; + Pico.sv.flags |= SRF_ENABLED; } - if (SRam.end == 0 || SRam.start > SRam.end) + if (Pico.sv.end == 0 || Pico.sv.start > Pico.sv.end) { // some games may have bad headers, like S&K and Sonic3 // note: majority games use 0x200000 as starting address, but there are some which // use something else (0x300000 by HardBall '95). Luckily they have good headers. - SRam.start = 0x200000; - SRam.end = 0x203FFF; - SRam.flags |= SRF_ENABLED; + Pico.sv.start = 0x200000; + Pico.sv.end = 0x203FFF; + Pico.sv.flags |= SRF_ENABLED; } // set EEPROM defaults, in case it gets detected - SRam.eeprom_type = 0; // 7bit (24C01) - SRam.eeprom_bit_cl = 1; - SRam.eeprom_bit_in = 0; - SRam.eeprom_bit_out= 0; + Pico.sv.eeprom_type = 0; // 7bit (24C01) + Pico.sv.eeprom_bit_cl = 1; + Pico.sv.eeprom_bit_in = 0; + Pico.sv.eeprom_bit_out= 0; if (carthw_cfg != NULL) - parse_carthw(carthw_cfg, &fill_sram); + parse_carthw(carthw_cfg, &fill_sram, &carthw_detected); - if (SRam.flags & SRF_ENABLED) + // assume the standard mapper for large roms + if (!carthw_detected && Pico.romsize > 0x400000) + carthw_ssf2_startup(); + + if (Pico.sv.flags & SRF_ENABLED) { - if (SRam.flags & SRF_EEPROM) - SRam.size = 0x2000; + if (Pico.sv.flags & SRF_EEPROM) + Pico.sv.size = 0x2000; else - SRam.size = SRam.end - SRam.start + 1; + Pico.sv.size = Pico.sv.end - Pico.sv.start + 1; - SRam.data = calloc(SRam.size, 1); - if (SRam.data == NULL) - SRam.flags &= ~SRF_ENABLED; + Pico.sv.data = calloc(Pico.sv.size, 1); + if (Pico.sv.data == NULL) + Pico.sv.flags &= ~SRF_ENABLED; - if (SRam.eeprom_type == 1) // 1 == 0 in PD EEPROM code - SRam.eeprom_type = 0; + if (Pico.sv.eeprom_type == 1) // 1 == 0 in PD EEPROM code + Pico.sv.eeprom_type = 0; } - if ((SRam.flags & SRF_ENABLED) && fill_sram) + if ((Pico.sv.flags & SRF_ENABLED) && fill_sram) { elprintf(EL_STATUS, "SRAM fill"); - memset(SRam.data, 0xff, SRam.size); + memset(Pico.sv.data, 0xff, Pico.sv.size); } - // Unusual region 'code' - if (rom_strcmp(0x1f0, "EUROPE") == 0 || rom_strcmp(0x1f0, "Europe") == 0) - *(int *) (Pico.rom + 0x1f0) = 0x20204520; + // tweak for Blackthorne: master SH2 overwrites stack of slave SH2 being in PWM + // interrupt. On real hardware, nothing happens since slave fetches the values + // it has written from its cache, but picodrive doesn't emulate caching. + // move master memory area down by 0x100 bytes. + // XXX replace this abominable hack. It might cause other problems in the game! + if (PicoIn.quirks & PQUIRK_BLACKTHORNE_HACK) { + int i; + unsigned a = 0; + for (i = 0; i < Pico.romsize; i += 4) { + unsigned v = CPU_BE2(*(u32 *) (Pico.rom + i)); + if (a && v == a + 0x400) { // patch if 2 pointers with offset 0x400 are found + elprintf(EL_STATUS, "auto-patching @%06x: %08x->%08x\n", i, v, v - 0x100); + *(u32 *) (Pico.rom + i) = CPU_BE2(v - 0x100); + } + // detect a pointer into the incriminating area + a = 0; + if (v >> 12 == 0x0603f000 >> 12 && !(v & 3)) + a = v; + } + } + + // tweak for Mars Check Program: copies 32K longwords (128KB) from a 64KB buffer + // in ROM or DRAM to SDRAM with DMA in 4-longword mode, overwriting an SDRAM comm + // area in turn. This crashes the test on emulators without CPU cache emulation. + // This may be a bug in Mars Check, since it's only checking for the 64KB result. + // Patch the DMA transfers so that they transfer only 64KB. + if (PicoIn.quirks & PQUIRK_MARSCHECK_HACK) { + int i; + unsigned a = 0; + for (i = 0; i < Pico.romsize; i += 4) { + unsigned v = CPU_BE2(*(u32 *) (Pico.rom + i)); + if (a == 0xffffff8c && v == 0x5ee1) { // patch if 4-long xfer written to CHCR + elprintf(EL_STATUS, "auto-patching @%06x: %08x->%08x\n", i, v, v & ~0x800); + *(u32 *) (Pico.rom + i) = CPU_BE2(v & ~0x800); // change to half-sized xfer + } + a = v; + } + } } +static void PicoCartDetectMS(void) +{ + memset(&Pico.sv, 0, sizeof(Pico.sv)); + + // Always map SRAM, since there's no indicator in ROM if it's needed or not + // TODO: this should somehow be coming from a cart database! + + Pico.sv.size = 0x8000; // Sega mapper, 2 banks of 16 KB each + Pico.sv.flags |= SRF_ENABLED; + Pico.sv.data = calloc(Pico.sv.size, 1); + if (Pico.sv.data == NULL) + Pico.sv.flags &= ~SRF_ENABLED; +} // vim:shiftwidth=2:expandtab diff --git a/pico/carthw.cfg b/pico/carthw.cfg index 72d4a9a1..5932dd7c 100644 --- a/pico/carthw.cfg +++ b/pico/carthw.cfg @@ -1,14 +1,14 @@ # hardware (hw = ...): # svp - Sega Virtua Processor # pico - Sega Pico (not really cart hw, but convenient to support here) -# prot - siple copy protection devices in unlicensed cartridges (see prot. below) -# prot_lk3 - Lion King 3 / Super King Kong 99 protection. +# prot - simple copy protection devices in unlicensed cartridges (see prot. below) # # cartridge properties (prop = ...): -# no_sram - don't emulate sram/EEPROM even if ROM headers tell it's there -# no_eeprom - save storage is not EEPROM, even if ROM headers tell it is -# filled_sram - save storage needs to be initialized with FFh instead of 00h -# force_6btn - game only supports 6 button pad (32X X-men proto) +# no_sram - don't emulate sram/EEPROM even if ROM headers tell it's there +# no_eeprom - save storage is not EEPROM, even if ROM headers tell it is +# filled_sram - save storage needs to be initialized with FFh instead of 00h +# force_6btn - game only supports 6 button pad (32X X-men proto) +# no_z80_bus_lock - don't emulate z80 bus getting closed to the 68k when bus is released # # mappers (hw = ...): # ssf2_mapper - used in Super Street Fighter2 @@ -16,6 +16,9 @@ # realtec_mapper # radica_mapper - similar to x_in_1_mapper # piersolar_mapper - used in Pier Solar +# sf00x_mapper - versions x=1,2,4 used by superfighter team +# lk3_mapper - mapper for Lion King 3 / Super King Kong 99 and some more +# smw64_mapper - mapper for Super Mario World 64 # # save storage memory range (inclusive, overrides ROM header): # sram_range = @@ -41,14 +44,39 @@ check_str = 0x150, "VIRTUA RACING" check_str = 0x810, "OHMP" hw = svp -[Pico] -check_str = 0x100, "SEGA PICO" +[Soreike! Anpanman no Game de Asobou Anpanman - Pico] +check_str = 0x100, "SEGA IAC " hw = pico -[Pico] -check_str = 0x100, "IMA IKUNOUJYUKU" +# Unou Kaihatsu Series: IMA IKUNO[U]JYUKU +[Unou Kaihatsu Series - Pico] +check_str = 0x100, "IMA IKUNO" hw = pico +# X-Men proto +[X-Men (prototype) - 32X] +check_str = 0x120, "32X SAMPLE PROGRAM" +check_str = 0x32b74c, "Bishop Level" +prop = force_6btn + +# WWF Raw +[WWF Raw - 32X] +check_str = 0x100, "SEGA 32X" +check_str = 0x150, "WWF RAW" +prop = wwfraw_hack # reads back data written to high ROM adresses from cache + +# Blackthorne +[Blackthorne - 32X] +check_str = 0x100, "SEGA 32X" +check_str = 0x120, "BLACKTHORNE" +prop = blackthorne_hack # reads back data overwritten by 2nd CPU from cache + +# Mars check program +[Mars Check - 32X] +check_str = 0x100, "SEGA" +check_str = 0x150, "MARS CHECK PROGRAM" +prop = marscheck_hack # reads back data overwritten by DMA from cache + # sram emulation triggers some protection for this one [Puggsy] check_str = 0x120, "PUGGSY" @@ -63,13 +91,16 @@ prop = filled_sram check_str = 0x150, "MICRO MACHINES II" prop = filled_sram -# X-Men proto -[X-Men (prototype)] -check_str = 0x150, "32X SAMPLE PROGRAM" -check_str = 0x32b74c, "Bishop Level" -prop = force_6btn +# bad headers +[HardBall III] +check_str = 0x150, " HardBall III" +sram_range = 0x200000,0x20ffff # The SSF2 mapper +[Mega Everdrive] +check_str = 0x100, "SEGA SSF" +hw = ssf2_mapper + [Super Street Fighter II - The New Challengers (U)] check_str = 0x150, "SUPER STREET FIGHTER2 The New Challengers" hw = ssf2_mapper @@ -81,6 +112,23 @@ check_str = 0x150, "PIER" check_str = 0x610, "Respect" hw = piersolar_mapper +# Beggar Prince, unusual SRAM location +[Beggar Prince] +check_str = 0x150, "BEGGAR PRINCE" +hw = sf001_mapper +sram_range = 0x400000,0x40ffff +prop = filled_sram + +[Legend of Wukong] +check_str = 0x150, "LEGEND OF WUKONG" +hw = sf002_mapper + +# Star Odyssey, r/w in SRAM mirror (double SRAM as a kludge) +[Star Odyssey] +check_str = 0x150, "STAR ODYSSEY" +hw = sf004_mapper +sram_range = 0x200000,0x207fff + # detect *_in_1 based on first game and if it's larger than it should be, # as some dumps look like to be incomplete. # This will also pick overdumps, but those should still work with the mapper applied. @@ -99,6 +147,16 @@ check_str = 0x150, "ALIEN 3" check_size_gt = 0x080000 hw = x_in_1_mapper +[5-in-1 Megadrive Portable] +check_str = 0x150, "TINY TOON ADVENTURES" +check_size_gt = 0x080000 +hw = x_in_1_mapper + +[40-games-in-1] +check_str = 0x160, "FS MOONWALKER" +check_size_gt = 0x080000 +hw = x_in_1_mapper + [Super 15-in-1] check_str = 0x150, " SHOVE IT!" check_size_gt = 0x020000 @@ -110,11 +168,16 @@ check_size_gt = 0x020000 hw = x_in_1_mapper # radica -[radica_v1] +[Arcade Legends Sega] check_str = 0x150, "KID CHAMELEON" check_size_gt = 0x100000 hw = radica_mapper +[Arcade Legends Capcom] +check_str = 0x150, "STREET FIGHTER" +check_size_gt = 0x300000 +hw = radica_mapper + # realtec [Earth Defend, The (Unl)] check_str = 0x94, "THE EARTH DEFEND" @@ -139,6 +202,12 @@ check_str = 0x150, "FRANK THOMAS BIGHURT BASEBAL" eeprom_type = 3 eeprom_lines = 8,0,0 +[Greatest Heavyweights] +check_str = 0x150, "GREATEST HEAVYWEIGHTS" +sram_range = 0x200000,0x200001 +eeprom_type = 1 +eeprom_lines = 1,0,0 + [MICRO MACHINES II] check_str = 0x150, "MICRO MACHINES II" sram_range = 0x300000,0x380001 @@ -207,74 +276,224 @@ sram_range = 0x200000,0x200001 eeprom_type = 1 eeprom_lines = 6,7,7 +[Sports Talk Baseball] +check_str = 0x150, "MLBPA SPORTS TALK BASEBALL" +sram_range = 0x200000,0x200001 +eeprom_type = 1 +eeprom_lines = 1,0,0 + # Unlicensed games with simple protections # some of these come from Haze, some from myself (notaz). +# more added by irixxxx from Mame and G+GX +# check_crc32 calculation for 1st 64 KB only to allow for overdumps + +# lk3, mapper + bitswapping hardware +[Lion King 3 (Unl)] +check_str = 0x104, " are Registered Trademarks" +check_crc32 = 0xc9706e25 +hw = lk3_mapper + +[Super King Kong 99 (Unl)] +check_str = 0x104, " are Registered Trademarks" +check_crc32 = 0x4c98cc30 +hw = lk3_mapper + +[Gunfight 3-in-1 (Unl)] +check_str = 0x104, " are Registered Trademarks" +check_crc32 = 0x6ec032cb +hw = lk3_mapper + +[Pocket Monsters II (Unl)] +check_str = 0x104, " " +check_crc32 = 0x0d097f5c +hw = lk3_mapper + +[Pokemon Stadium (Unl)] +check_str = 0x104, " " +check_crc32 = 0xbf7219df +hw = lk3_mapper + +[Mulan (Unl)] +check_str = 0x104, " " +check_crc32 = 0xb5b7606e +hw = lk3_mapper + +[Final Samurai V (Unl)] # aka Soul Edge +check_str = 0x104, " " +check_crc32 = 0xab3ae5e9 +hw = lk3_mapper + +[Top Fighter 2000 (Unl)] +check_str = 0x104, " " +check_crc32 = 0x802f53f9 +hw = lk3_mapper + +# smw64 mapper + prot +[Super Mario World 64 (Unl)] +check_csum = 0 +check_crc32 = 0xf63b7bdc +hw = smw64_mapper + +# cart I/O area [Bug's Life, A (Unl)] check_str = 0x104, " " -check_crc32 = 0x10458e09 +check_crc32 = 0x50aa5a9b hw = prot prot_ro_value16 = 0xa13000,0xffff00,0x28 -[Elf Wor (Unl)] -check_str = 0x172, "GAME : ELF WOR" +[Rockman X3 (Unl)] +check_csum = 0 +check_crc32 = 0xee20be2c hw = prot -prot_ro_value16 = 0x400000,-2,0x5500 -prot_ro_value16 = 0x400002,-2,0xc900 # check is done if the above one fails -prot_ro_value16 = 0x400004,-2,0x0f00 -prot_ro_value16 = 0x400006,-2,0x1800 # similar to above +prot_ro_value16 = 0xa13000,-2,0x0c -[King of Fighters '98, The (Unl)] +[Super Mario World (Unl)] +check_str = 0x104, "SEGASEGASEGA" +check_crc32 = 0xc3616596 +hw = prot +prot_ro_value16 = 0xa13000,-2,0x1c + +[Super Mario Bros. 2 (Unl)] # aka Super Mario 2 1998 +check_str = 0x104, " are Registered Trademarks" +check_crc32 = 0x7861fb28 +hw = prot +prot_ro_value16 = 0xa13000,-2,0x0a + +[Pocket Monsters (Unl)] check_str = 0x104, " " -check_crc32 = 0xcbc38eea +check_crc32 = 0xf4cb9b37 +hw = prot +prot_ro_value16 = 0xa13000,-2,0x00 +prot_ro_value16 = 0xa13002,-2,0x01 +prot_ro_value16 = 0xa1303e,-2,0x1f + +[King of Fighters '99, The (Unl)] +check_str = 0x104, " " +check_crc32 = 0x7bdfb390 +hw = prot +prot_ro_value16 = 0xa13000,-2,0x00 +prot_ro_value16 = 0xa13002,-2,0x01 +prot_ro_value16 = 0xa1303e,-2,0x1f + +# cart upper 4MB +[King of Fighters '98+2000, The (Unl)] +check_str = 0x104, " " +check_crc32 = 0x8fb8b29e hw = prot prot_ro_value16 = 0x480000,0xff0000,0xaa00 prot_ro_value16 = 0x4a0000,0xff0000,0x0a00 prot_ro_value16 = 0x4c0000,0xff0000,0xf000 prot_ro_value16 = 0x400000,0xc00000,0x0000 # default for 400000-7f0000 -[Lion King 3 (Unl)] -check_str = 0x104, " are Registered Trademarks" -check_crc32 = 0xc004219d -hw = prot_lk3 - -[Lion King II, The (Unl)] -check_str = 0x104, " are Registered Trademarks" -check_crc32 = 0xaff46765 -hw = prot -prot_rw_value16 = 0x400000,0xc00004,0 -prot_rw_value16 = 0x400004,0xc00004,0 - -[Mahjong Lover (Unl)] -check_str = 0x118, "CREATON. " -check_crc32 = 0xddd02ba4 +[Mahjong Lover (Unl), Super Majon Club (Unl), Insane Paradise (Unl)] +# Majiang qingren, Chaoji majiang Club, Fengkuang taohuayuan (Crazy Utopia) +check_str = 0x104, " MEGA DRIVE (C)" +check_str = 0x118, "CREATON." +check_str = 0x180, "MDGM-000" hw = prot prot_ro_value16 = 0x400000,-2,0x9000 prot_ro_value16 = 0x401000,-2,0xd300 -[Pocket Monsters (Unl)] -check_str = 0x104, " " -check_crc32 = 0xf68f6367 +[Rook Mann (Unl)] # aka Rock Heaven +check_csum = 0x6cca +check_crc32 = 0xab5d5d9e hw = prot -prot_ro_value16 = 0xa13002,-2,0x01 -prot_ro_value16 = 0xa1303e,-2,0x1f +prot_ro_value16 = 0x500008,-2,0x5000 -[Pocket Monsters (Unl) [a1]] -check_str = 0x104, " " -check_crc32 = 0xfb176667 +[Rock World (Unl)] +check_str = 0x113, "KANKO 91-92" +check_crc32 = 0x79423515 hw = prot -prot_ro_value16 = 0xa13000,-2,0x14 -prot_ro_value16 = 0xa13002,-2,0x01 -prot_ro_value16 = 0xa1303e,-2,0x1f +prot_ro_value16 = 0x500008,-2,0x5000 +prot_ro_value16 = 0x500208,-2,0xa000 -[Rockman X3 (Unl)] +[Lion King II, The (Unl)] +check_str = 0x104, " are Registered Trademarks" +check_crc32 = 0x7009cac3 +hw = prot +prot_rw_value16 = 0x400000,0xc00004,0 +prot_rw_value16 = 0x400004,0xc00004,0 + +[Squirrel King (Unl)] +check_str = 0x104, " are Registered Trademarks" +check_crc32 = 0x1c602dd4 +hw = prot +prot_rw_value16 = 0x400000,0xc00000,0 +prot_rw_value16 = 0x400004,0xc00004,0 + +[Tiny Toon Adventures 3 (Unl)] +check_str = 0x104, " are Registered Trademarks" +check_crc32 = 0xc31cfcca +hw = prot +prot_rw_value16 = 0x400000,0xc00000,0 +prot_rw_value16 = 0x400004,0xc00004,0 + +[Barver Battle Saga (Unl)] # Taikong zhanshi +check_csum = 0x30b9 +check_crc32 = 0x35e0ff17 +hw = prot +prot_rw_value16 = 0x400000,0xc00000,0 +prot_rw_value16 = 0x400004,0xc00004,0 + +[Final Fantasy (Unl)] # Taikong zhanshi (russian bootleg) +check_csum = 0x5ff9 +check_crc32 = 0x4b2b163a +hw = prot +prot_rw_value16 = 0x400000,0xc00000,0 +prot_rw_value16 = 0x400004,0xc00004,0 + +[Water Margin (Unl)] # Shuihu Zhuan +check_csum = 0x6001 +check_crc32 = 0xfa80956a +hw = prot +prot_rw_value16 = 0x400000,0xc00000,0 +prot_rw_value16 = 0x400004,0xc00004,0 + +[Legend of Fengshen Yingjie, The (Unl)] # Fengshen yingjie chuan (Canon) +check_csum = 0xffff +check_crc32 = 0x91865ea4 +hw = prot +prot_rw_value16 = 0x400000,0xc00000,0 +prot_rw_value16 = 0x400004,0xc00004,0 + +[Legend of Arthur, The (Unl)] # Ya se chuanshuo +check_csum = 0xffff +check_crc32 = 0x8e83dbfa +hw = prot +prot_ro_value16 = 0x400000,-2,0x6300 +prot_ro_value16 = 0x400002,-2,0x9800 +prot_ro_value16 = 0x400004,-2,0xc900 +prot_ro_value16 = 0x400006,-2,0x1800 + +[Wucom Legend (Unl)] # Wukong waizhuan +check_str = 0x104, " are Registered Trademarks" +check_crc32 = 0xf838aa3b +hw = prot +prot_ro_value16 = 0x400000,-2,0x6300 +prot_ro_value16 = 0x400002,-2,0x9800 +prot_ro_value16 = 0x400004,-2,0xc900 +prot_ro_value16 = 0x400006,-2,0x1800 + +[Super Magician (Unl)] # Ling huan daoshi +check_str = 0x172, "GAME : ELF WOR" +hw = prot +prot_ro_value16 = 0x400000,-2,0x5500 +prot_ro_value16 = 0x400002,-2,0x0f00 # check is done if the above one fails +prot_ro_value16 = 0x400004,-2,0xc900 +prot_ro_value16 = 0x400006,-2,0x1800 # similar to above + +[Mighty Morphin Power Rangers (Unl)] +check_str = 0x104, " " +check_crc32 = 0x5fdeb37b +hw = prot +prot_ro_value16 = 0x400000,-2,0x5500 +prot_ro_value16 = 0x400002,-2,0x0f00 +prot_ro_value16 = 0x400004,-2,0xc900 +prot_ro_value16 = 0x400006,-2,0x1800 + +[Smart Mouse (Unl)] # Huanle taoqi shu check_csum = 0 -check_crc32 = 0x3ee639f0 -hw = prot -prot_ro_value16 = 0xa13000,-2,0x0c - -[Smart Mouse (Unl)] -check_csum = 0 -check_crc32 = 0xdecdf740 +check_crc32 = 0xc9539fce hw = prot prot_ro_value16 = 0x400000,-2,0x5500 prot_ro_value16 = 0x400002,-2,0x0f00 @@ -283,38 +502,74 @@ prot_ro_value16 = 0x400006,-2,0xf000 [Soul Blade (Unl)] check_str = 0x104, " " -check_crc32 = 0xf26f88d1 +check_crc32 = 0x6a95f766 hw = prot +prot_ro_value16 = 0x400000,-2,0x6300 prot_ro_value16 = 0x400002,-2,0x9800 prot_ro_value16 = 0x400004,-2,0xaa00 # or 0xc900 prot_ro_value16 = 0x400006,-2,0xf000 [Super Bubble Bobble (Unl)] check_str = 0x104, " are Registered Trademarks" -check_crc32 = 0x4820a161 +check_crc32 = 0xf93f3d0b hw = prot prot_ro_value16 = 0x400000,-2,0x5500 prot_ro_value16 = 0x400002,-2,0x0f00 -[Super King Kong 99 (Unl)] +[Battle of Red Cliffs, The (Unl)] # Sanguo yanyi (Romance of the 3 Kingdoms) check_str = 0x104, " are Registered Trademarks" -check_crc32 = 0x413dfee2 -hw = prot_lk3 - -[Super Mario Bros. (Unl)] -check_str = 0x140, "SUPER MARIO BROS " +check_crc32 = 0x66165305 hw = prot -prot_ro_value16 = 0xa13000,-2,0x0c +prot_ro_value16 = 0x400000,-2,0x5500 +prot_ro_value16 = 0x400002,-2,0x0f00 +prot_ro_value16 = 0x400004,-2,0xaa00 +prot_ro_value16 = 0x400006,-2,0xf000 -[Super Mario 2 1998 (Unl)] -check_str = 0x104, " are Registered Trademarks" -check_crc32 = 0xf7e1b3e1 +[Tunderbolt II (Unl)] # Leidian +check_str = 0xfe, "WISEGAME" +check_crc32 = 0x6f01bd65 hw = prot -prot_ro_value16 = 0xa13000,-2,0x0a +prot_ro_value16 = 0x400000,-2,0x5500 +prot_ro_value16 = 0x400002,-2,0x0f00 +prot_ro_value16 = 0x400004,-2,0xaa00 +prot_ro_value16 = 0x400006,-2,0xf000 -[Squirrel King (R)] -check_str = 0x104, " are Registered Trademarks" -check_crc32 = 0xb8261ff5 +[16 Tiles Mahjong 1+2 (Unl)] # Zhang majiang +check_str = 0xfe, "WISEGAME IS TRADE MARKER" hw = prot -prot_rw_value16 = 0x400000,0xc00000,0 +prot_ro_value16 = 0x400002,-2,0xaa00 +prot_ro_value16 = 0x400004,-2,0xc900 +prot_ro_value16 = 0x400006,-2,0xf000 +[Super Poker (Unl)] # Chaoji puke +check_csum = 0xffff +check_crc32 = 0xdd02797c +hw = prot +prot_ro_value16 = 0x400000,-2,0x5500 +prot_ro_value16 = 0x400002,-2,0x0f00 +prot_ro_value16 = 0x400004,-2,0xaa00 +prot_ro_value16 = 0x400006,-2,0xf000 + +[777 Casino (Unl)] # Menghuan shuiguo pan +check_csum = 0 +check_crc32 = 0xee9fc429 +hw = prot +prot_ro_value16 = 0x400000,-2,0x6300 + +# Unlicensed homebrew games made by V.M.V. +# to prevent bus conflicts between the audio drivers in 68k and Z80 + +[Ben 10 (Unl)] +check_str = 0x180, "GM 00000000-00" +check_crc32 = 0x6732aab4 +prop = no_z80_bus_lock + +[Mario 3: Vokrug Sveta (Unl)] +check_str = 0x180, "GM 00000000-00" +check_crc32 = 0xe302585a +prop = no_z80_bus_lock + +[Mario 4: Kosmicheskaya Odisseya (Unl)] +check_csum = 8224 +check_crc32 = 0x20ed0de8 +prop = no_z80_bus_lock diff --git a/pico/carthw/carthw.c b/pico/carthw/carthw.c index 06936fbf..ccafe726 100644 --- a/pico/carthw/carthw.c +++ b/pico/carthw/carthw.c @@ -1,6 +1,7 @@ /* * Support for a few cart mappers and some protection. * (C) notaz, 2008-2011 + * (C) irixxxx, 2021-2022 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -8,67 +9,98 @@ #include "../pico_int.h" #include "../memory.h" +#include "eeprom_spi.h" -/* The SSFII mapper */ -static unsigned char ssf2_banks[8]; +static int have_bank(u32 base) +{ + // the loader allocs in 512K quantities + if (base >= Pico.romsize) { + elprintf(EL_ANOMALY|EL_STATUS, "carthw: missing bank @ %06x", base); + return 0; + } + return 1; +} + +/* standard/ssf2 mapper */ +int carthw_ssf2_active; +unsigned char carthw_ssf2_banks[8]; static carthw_state_chunk carthw_ssf2_state[] = { - { CHUNK_CARTHW, sizeof(ssf2_banks), &ssf2_banks }, - { 0, 0, NULL } + { CHUNK_CARTHW, sizeof(carthw_ssf2_banks), &carthw_ssf2_banks }, + { 0, 0, NULL } }; -static void carthw_ssf2_write8(u32 a, u32 d) +void carthw_ssf2_write8(u32 a, u32 d) { - u32 target, base; + u32 target, base; - if ((a & 0xfffff0) != 0xa130f0) { - PicoWrite8_io(a, d); - return; - } + if ((a & ~0x0e) != 0xa130f1 || a == 0xa130f1) { + PicoWrite8_io(a, d); + return; + } - a &= 0x0e; - if (a == 0) - return; + a &= 0x0e; + if (a == 0) + return; + if (carthw_ssf2_banks[a >> 1] == d) + return; - ssf2_banks[a >> 1] = d; - base = d << 19; - target = a << 18; - if (base + 0x80000 > Pico.romsize) { - elprintf(EL_ANOMALY|EL_STATUS, "ssf2: missing bank @ %06x", base); - return; - } + base = d << 19; + target = a << 18; + if (!have_bank(base)) + return; + carthw_ssf2_banks[a >> 1] = d; - cpu68k_map_set(m68k_read8_map, target, target + 0x80000 - 1, Pico.rom + base, 0); - cpu68k_map_set(m68k_read16_map, target, target + 0x80000 - 1, Pico.rom + base, 0); + cpu68k_map_set(m68k_read8_map, target, target + 0x80000 - 1, Pico.rom + base, 0); + cpu68k_map_set(m68k_read16_map, target, target + 0x80000 - 1, Pico.rom + base, 0); +} + +void carthw_ssf2_write16(u32 a, u32 d) +{ + PicoWrite16_io(a, d); + if ((a & ~0x0f) == 0xa130f0) + carthw_ssf2_write8(a + 1, d); } static void carthw_ssf2_mem_setup(void) { - cpu68k_map_set(m68k_write8_map, 0xa10000, 0xa1ffff, carthw_ssf2_write8, 1); + cpu68k_map_set(m68k_write8_map, 0xa10000, 0xa1ffff, carthw_ssf2_write8, 1); + cpu68k_map_set(m68k_write16_map, 0xa10000, 0xa1ffff, carthw_ssf2_write16, 1); } static void carthw_ssf2_statef(void) { - int i; - for (i = 1; i < 8; i++) - carthw_ssf2_write8(0xa130f0 | (i << 1), ssf2_banks[i]); + int i, reg; + for (i = 1; i < 8; i++) { + reg = carthw_ssf2_banks[i]; + carthw_ssf2_banks[i] = i; + carthw_ssf2_write8(0xa130f1 | (i << 1), reg); + } +} + +static void carthw_ssf2_unload(void) +{ + memset(carthw_ssf2_banks, 0, sizeof(carthw_ssf2_banks)); + carthw_ssf2_active = 0; } void carthw_ssf2_startup(void) { - int i; + int i; - elprintf(EL_STATUS, "SSF2 mapper startup"); + elprintf(EL_STATUS, "SSF2 mapper startup"); - // default map - for (i = 0; i < 8; i++) - ssf2_banks[i] = i; + // default map + for (i = 0; i < 8; i++) + carthw_ssf2_banks[i] = i; - PicoCartMemSetup = carthw_ssf2_mem_setup; - PicoLoadStateHook = carthw_ssf2_statef; - carthw_chunks = carthw_ssf2_state; + PicoCartMemSetup = carthw_ssf2_mem_setup; + PicoLoadStateHook = carthw_ssf2_statef; + PicoCartUnloadHook = carthw_ssf2_unload; + carthw_chunks = carthw_ssf2_state; + carthw_ssf2_active = 1; } @@ -102,7 +134,7 @@ static carthw_state_chunk carthw_Xin1_state[] = { 0, 0, NULL } }; -// TODO: test a0, reads, w16 +// TODO: reads should also work, but then we need to handle open bus static void carthw_Xin1_write8(u32 a, u32 d) { if ((a & 0xffff00) != 0xa13000) { @@ -110,12 +142,23 @@ static void carthw_Xin1_write8(u32 a, u32 d) return; } - carthw_Xin1_do(a, 0x3f, 16); + carthw_Xin1_do(a, 0x3e, 16); +} + +static void carthw_Xin1_write16(u32 a, u32 d) +{ + if ((a & 0xffff00) != 0xa13000) { + PicoWrite16_io(a, d); + return; + } + + carthw_Xin1_write8(a + 1, d); } static void carthw_Xin1_mem_setup(void) { - cpu68k_map_set(m68k_write8_map, 0xa10000, 0xa1ffff, carthw_Xin1_write8, 1); + cpu68k_map_set(m68k_write8_map, 0xa10000, 0xa1ffff, carthw_Xin1_write8, 1); + cpu68k_map_set(m68k_write16_map, 0xa10000, 0xa1ffff, carthw_Xin1_write16, 1); } static void carthw_Xin1_reset(void) @@ -270,6 +313,7 @@ static carthw_state_chunk carthw_pier_state[] = { { CHUNK_CARTHW, sizeof(pier_regs), pier_regs }, { CHUNK_CARTHW + 1, sizeof(pier_dump_prot), &pier_dump_prot }, + { CHUNK_CARTHW + 2, 0, NULL }, // filled later { 0, 0, NULL } }; @@ -308,7 +352,8 @@ static void carthw_pier_write8(u32 a, u32 d) base = d << 19; goto do_map; case 0x09: - // TODO + Pico.sv.changed = 1; + eeprom_spi_write(d); break; case 0x0b: // eeprom read @@ -321,10 +366,9 @@ static void carthw_pier_write8(u32 a, u32 d) return; do_map: - if (base + 0x80000 > Pico.romsize) { - elprintf(EL_ANOMALY|EL_STATUS, "pier: missing bank @ %06x", base); + if (!have_bank(base)) return; - } + cpu68k_map_set(m68k_read8_map, target, target + 0x80000 - 1, Pico.rom + base, 0); cpu68k_map_set(m68k_read16_map, target, target + 0x80000 - 1, Pico.rom + base, 0); } @@ -346,7 +390,7 @@ static u32 carthw_pier_read8(u32 a) return PicoRead8_io(a); if (a == 0xa1300b) - return 0; // TODO + return eeprom_spi_read(a); elprintf(EL_UIO, "pier r8 [%06x] @%06x", a, SekPc); return 0; @@ -366,7 +410,7 @@ static u32 carthw_pier_prot_read8(u32 a) } elprintf(EL_UIO, "pier r8 [%06x] @%06x", a, SekPc); - return Pico.rom[(a & 0x7fff) ^ 1]; + return Pico.rom[MEM_BE2(a & 0x7fff)]; } static void carthw_pier_mem_setup(void) @@ -413,12 +457,14 @@ static void carthw_pier_reset(void) { pier_regs[0] = 1; pier_regs[1] = pier_regs[2] = pier_regs[3] = 0; - pier_dump_prot = 3; carthw_pier_statef(); + eeprom_spi_init(NULL); } void carthw_pier_startup(void) { + void *eeprom_state; + int eeprom_size = 0; int i; elprintf(EL_STATUS, "Pier Solar mapper startup"); @@ -430,26 +476,282 @@ void carthw_pier_startup(void) return; } + pier_dump_prot = 3; + // create dump protection bank for (i = 0; i < M68K_BANK_SIZE; i += 0x8000) memcpy(Pico.rom + Pico.romsize + i, Pico.rom, 0x8000); + // save EEPROM + eeprom_state = eeprom_spi_init(&eeprom_size); + Pico.sv.flags = 0; + Pico.sv.size = 0x10000; + Pico.sv.data = calloc(1, Pico.sv.size); + if (!Pico.sv.data) + Pico.sv.size = 0; + carthw_pier_state[2].ptr = eeprom_state; + carthw_pier_state[2].size = eeprom_size; + PicoCartMemSetup = carthw_pier_mem_setup; PicoResetHook = carthw_pier_reset; PicoLoadStateHook = carthw_pier_statef; carthw_chunks = carthw_pier_state; } +/* superfighter mappers, see mame: mame/src/devices/bus/megadrive/rom.cpp */ +unsigned int carthw_sf00x_reg; + +static carthw_state_chunk carthw_sf00x_state[] = +{ + { CHUNK_CARTHW, sizeof(carthw_sf00x_reg), &carthw_sf00x_reg }, + { 0, 0, NULL } +}; + +// SF-001 + +// additionally map SRAM at 0x3c0000 for the newer version of sf001 +static u32 carthw_sf001_read8_sram(u32 a) +{ + return m68k_read8((a & 0xffff) + Pico.sv.start); +} + +static u32 carthw_sf001_read16_sram(u32 a) +{ + return m68k_read16((a & 0xffff) + Pico.sv.start); +} + +static void carthw_sf001_write8_sram(u32 a, u32 d) +{ + m68k_write8((a & 0xffff) + Pico.sv.start, d); +} + +static void carthw_sf001_write16_sram(u32 a, u32 d) +{ + m68k_write16((a & 0xffff) + Pico.sv.start, d); +} + +static void carthw_sf001_write8(u32 a, u32 d) +{ + if ((a & 0xf00) != 0xe00 || (carthw_sf00x_reg & 0x20)) // wrong addr / locked + return; + + if (d & 0x80) { + // bank 0xe at addr 0x000000 + cpu68k_map_set(m68k_read8_map, 0x000000, 0x040000-1, Pico.rom+0x380000, 0); + cpu68k_map_set(m68k_read16_map, 0x000000, 0x040000-1, Pico.rom+0x380000, 0); + // SRAM also at 0x3c0000 for newer mapper version + cpu68k_map_set(m68k_read8_map, 0x3c0000, 0x400000-1, carthw_sf001_read8_sram, 1); + cpu68k_map_set(m68k_read16_map, 0x3c0000, 0x400000-1, carthw_sf001_read16_sram, 1); + cpu68k_map_set(m68k_write8_map, 0x3c0000, 0x400000-1, carthw_sf001_write8_sram, 1); + cpu68k_map_set(m68k_write16_map,0x3c0000, 0x400000-1, carthw_sf001_write16_sram, 1); + } else { + // bank 0x0 at addr 0x000000 + cpu68k_map_set(m68k_read8_map, 0x000000, 0x040000-1, Pico.rom, 0); + cpu68k_map_set(m68k_read16_map, 0x000000, 0x040000-1, Pico.rom, 0); + // SRAM off, bank 0xf at addr 0x3c0000 + cpu68k_map_set(m68k_read8_map, 0x3c0000, 0x400000-1, Pico.rom+0x3c0000, 0); + cpu68k_map_set(m68k_read16_map, 0x3c0000, 0x400000-1, Pico.rom+0x3c0000, 0); + cpu68k_map_set(m68k_write8_map, 0x3c0000, 0x400000-1, Pico.rom+0x3c0000, 0); + cpu68k_map_set(m68k_write16_map,0x3c0000, 0x400000-1, Pico.rom+0x3c0000, 0); + } + carthw_sf00x_reg = d; +} + +static void carthw_sf001_write16(u32 a, u32 d) +{ + carthw_sf001_write8(a + 1, d); +} + +static void carthw_sf001_mem_setup(void) +{ + // writing to low cartridge addresses + cpu68k_map_set(m68k_write8_map, 0x000000, 0x00ffff, carthw_sf001_write8, 1); + cpu68k_map_set(m68k_write16_map, 0x000000, 0x00ffff, carthw_sf001_write16, 1); +} + +static void carthw_sf001_reset(void) +{ + carthw_sf00x_reg = 0; + carthw_sf001_write8(0x0e01, 0); +} + +static void carthw_sf001_statef(void) +{ + int reg = carthw_sf00x_reg; + carthw_sf00x_reg = 0; + carthw_sf001_write8(0x0e01, reg); +} + +void carthw_sf001_startup(void) +{ + PicoCartMemSetup = carthw_sf001_mem_setup; + PicoResetHook = carthw_sf001_reset; + PicoLoadStateHook = carthw_sf001_statef; + carthw_chunks = carthw_sf00x_state; +} + +// SF-002 + +static void carthw_sf002_write8(u32 a, u32 d) +{ + if ((a & 0xf00) != 0xe00) + return; + + if (d & 0x80) { + // bank 0x00-0x0e on addr 0x20000 + cpu68k_map_set(m68k_read8_map, 0x200000, 0x3c0000-1, Pico.rom, 0); + cpu68k_map_set(m68k_read16_map, 0x200000, 0x3c0000-1, Pico.rom, 0); + } else { + // bank 0x10-0x1e on addr 0x20000 + cpu68k_map_set(m68k_read8_map, 0x200000, 0x3c0000-1, Pico.rom+0x200000, 0); + cpu68k_map_set(m68k_read16_map, 0x200000, 0x3c0000-1, Pico.rom+0x200000, 0); + } + carthw_sf00x_reg = d; +} + +static void carthw_sf002_write16(u32 a, u32 d) +{ + carthw_sf002_write8(a + 1, d); +} + +static void carthw_sf002_mem_setup(void) +{ + // writing to low cartridge addresses + cpu68k_map_set(m68k_write8_map, 0x000000, 0x00ffff, carthw_sf002_write8, 1); + cpu68k_map_set(m68k_write16_map, 0x000000, 0x00ffff, carthw_sf002_write16, 1); +} + +static void carthw_sf002_reset(void) +{ + carthw_sf002_write8(0x0e01, 0); +} + +static void carthw_sf002_statef(void) +{ + carthw_sf002_write8(0x0e01, carthw_sf00x_reg); +} + +void carthw_sf002_startup(void) +{ + PicoCartMemSetup = carthw_sf002_mem_setup; + PicoResetHook = carthw_sf002_reset; + PicoLoadStateHook = carthw_sf002_statef; + carthw_chunks = carthw_sf00x_state; +} + +// SF-004 + +// reading from cartridge I/O region returns the current bank index +static u32 carthw_sf004_read8(u32 a) +{ + if ((a & ~0xff) == 0xa13000) + return carthw_sf00x_reg & 0xf0; // bank index + return PicoRead8_io(a); +} + +static u32 carthw_sf004_read16(u32 a) +{ + if ((a & ~0xff) == 0xa13000) + return carthw_sf00x_reg & 0xf0; + return PicoRead16_io(a); +} + +// writing to low cartridge adresses changes mappings +static void carthw_sf004_write8(u32 a, u32 d) +{ + int idx, i; + unsigned bs = 0x40000; // bank size + + // there are 3 byte-sized regs, stored together in carthw_sf00x_reg + if (!(carthw_sf00x_reg & 0x8000)) + return; // locked + + switch (a & 0xf00) { + case 0xd00: + carthw_sf00x_reg = (carthw_sf00x_reg & ~0xff0000) | ((d & 0xff) << 16); + return PicoWrite8_io(0xa130f1, (d & 0x80) ? SRR_MAPPED : 0); // SRAM mapping + case 0xe00: + carthw_sf00x_reg = (carthw_sf00x_reg & ~0x00ff00) | ((d & 0xff) << 8); + break; + case 0xf00: + carthw_sf00x_reg = (carthw_sf00x_reg & ~0x0000ff) | ((d & 0xff) << 0); + break; + default: + return; // wrong addr + } + + // bank mapping changed + idx = ((carthw_sf00x_reg>>4) & 0x7); // bank index + if ((carthw_sf00x_reg>>8) & 0x40) { + // linear bank mapping, starting at idx + for (i = 0; i < 8; i++, idx = (idx+1) & 0x7) { + cpu68k_map_set(m68k_read8_map, i*bs, (i+1)*bs-1, Pico.rom + idx*bs, 0); + cpu68k_map_set(m68k_read16_map, i*bs, (i+1)*bs-1, Pico.rom + idx*bs, 0); + } + } else { + // single bank mapping + for (i = 0; i < 8; i++) { + cpu68k_map_set(m68k_read8_map, i*bs, (i+1)*bs-1, Pico.rom + idx*bs, 0); + cpu68k_map_set(m68k_read16_map, i*bs, (i+1)*bs-1, Pico.rom + idx*bs, 0); + } + } +} + +static void carthw_sf004_write16(u32 a, u32 d) +{ + carthw_sf004_write8(a + 1, d); +} + +static void carthw_sf004_mem_setup(void) +{ + // writing to low cartridge addresses + cpu68k_map_set(m68k_write8_map, 0x000000, 0x00ffff, carthw_sf004_write8, 1); + cpu68k_map_set(m68k_write16_map, 0x000000, 0x00ffff, carthw_sf004_write16, 1); + // reading from the cartridge I/O region + cpu68k_map_set(m68k_read8_map, 0xa10000, 0xa1ffff, carthw_sf004_read8, 1); + cpu68k_map_set(m68k_read16_map, 0xa10000, 0xa1ffff, carthw_sf004_read16, 1); +} + +static void carthw_sf004_reset(void) +{ + carthw_sf00x_reg = -1; + carthw_sf004_write8(0x0d01, 0); + carthw_sf004_write8(0x0f01, 0); + carthw_sf004_write8(0x0e01, 0x80); +} + +static void carthw_sf004_statef(void) +{ + int reg = carthw_sf00x_reg; + carthw_sf00x_reg = -1; + carthw_sf004_write8(0x0d01, reg >> 16); + carthw_sf004_write8(0x0f01, reg >> 0); + carthw_sf004_write8(0x0e01, reg >> 8); +} + +void carthw_sf004_startup(void) +{ + PicoCartMemSetup = carthw_sf004_mem_setup; + PicoResetHook = carthw_sf004_reset; + PicoLoadStateHook = carthw_sf004_statef; + carthw_chunks = carthw_sf00x_state; +} + /* Simple unlicensed ROM protection emulation */ static struct { u32 addr; u32 mask; u16 val; u16 readonly; -} *sprot_items; -static int sprot_item_alloc; +} sprot_items[8]; static int sprot_item_count; +static carthw_state_chunk carthw_sprot_state[] = +{ + { CHUNK_CARTHW, sizeof(sprot_items), &sprot_items }, + { 0, 0, NULL } +}; + static u16 *carthw_sprot_get_val(u32 a, int rw_only) { int i; @@ -467,9 +769,6 @@ static u32 PicoRead8_sprot(u32 a) u16 *val; u32 d; - if (0xa10000 <= a && a < 0xa12000) - return PicoRead8_io(a); - val = carthw_sprot_get_val(a, 0); if (val != NULL) { d = *val; @@ -478,39 +777,33 @@ static u32 PicoRead8_sprot(u32 a) elprintf(EL_UIO, "prot r8 [%06x] %02x @%06x", a, d, SekPc); return d; } - else { - elprintf(EL_UIO, "prot r8 [%06x] MISS @%06x", a, SekPc); - return 0; - } + else if (0xa10000 <= a && a <= 0xa1ffff) + return PicoRead8_io(a); + + elprintf(EL_UIO, "prot r8 [%06x] MISS @%06x", a, SekPc); + return 0; } static u32 PicoRead16_sprot(u32 a) { u16 *val; - if (0xa10000 <= a && a < 0xa12000) - return PicoRead16_io(a); - val = carthw_sprot_get_val(a, 0); if (val != NULL) { elprintf(EL_UIO, "prot r16 [%06x] %04x @%06x", a, *val, SekPc); return *val; } - else { - elprintf(EL_UIO, "prot r16 [%06x] MISS @%06x", a, SekPc); - return 0; - } + else if (0xa10000 <= a && a <= 0xa1ffff) + return PicoRead16_io(a); + + elprintf(EL_UIO, "prot r16 [%06x] MISS @%06x", a, SekPc); + return 0; } static void PicoWrite8_sprot(u32 a, u32 d) { u16 *val; - if (0xa10000 <= a && a < 0xa12000) { - PicoWrite8_io(a, d); - return; - } - val = carthw_sprot_get_val(a, 1); if (val != NULL) { if (a & 1) @@ -519,45 +812,33 @@ static void PicoWrite8_sprot(u32 a, u32 d) *val = (*val & 0x00ff) | (d << 8); elprintf(EL_UIO, "prot w8 [%06x] %02x @%06x", a, d & 0xff, SekPc); } - else - elprintf(EL_UIO, "prot w8 [%06x] %02x MISS @%06x", a, d & 0xff, SekPc); + else if (0xa10000 <= a && a <= 0xa1ffff) + return PicoWrite8_io(a, d); + + elprintf(EL_UIO, "prot w8 [%06x] %02x MISS @%06x", a, d & 0xff, SekPc); } static void PicoWrite16_sprot(u32 a, u32 d) { u16 *val; - if (0xa10000 <= a && a < 0xa12000) { - PicoWrite16_io(a, d); - return; - } - val = carthw_sprot_get_val(a, 1); if (val != NULL) { *val = d; elprintf(EL_UIO, "prot w16 [%06x] %04x @%06x", a, d & 0xffff, SekPc); } - else - elprintf(EL_UIO, "prot w16 [%06x] %04x MISS @%06x", a, d & 0xffff, SekPc); + else if (0xa10000 <= a && a <= 0xa1ffff) + return PicoWrite16_io(a, d); + + elprintf(EL_UIO, "prot w16 [%06x] %04x MISS @%06x", a, d & 0xffff, SekPc); } void carthw_sprot_new_location(unsigned int a, unsigned int mask, unsigned short val, int is_ro) { - if (sprot_items == NULL) { - sprot_items = calloc(8, sizeof(sprot_items[0])); - sprot_item_alloc = 8; - sprot_item_count = 0; - } - - if (sprot_item_count == sprot_item_alloc) { - void *tmp; - sprot_item_alloc *= 2; - tmp = realloc(sprot_items, sprot_item_alloc); - if (tmp == NULL) { - elprintf(EL_STATUS, "OOM"); - return; - } - sprot_items = tmp; + int sprot_elems = sizeof(sprot_items)/sizeof(sprot_items[0]); + if (sprot_item_count == sprot_elems) { + elprintf(EL_STATUS, "too many sprot items"); + return; } sprot_items[sprot_item_count].addr = a; @@ -569,17 +850,17 @@ void carthw_sprot_new_location(unsigned int a, unsigned int mask, unsigned short static void carthw_sprot_unload(void) { - free(sprot_items); - sprot_items = NULL; - sprot_item_count = sprot_item_alloc = 0; + sprot_item_count = 0; } static void carthw_sprot_mem_setup(void) { int start; - // map ROM - 0x7fffff, /TIME areas (which are tipically used) + // map 0x400000 - 0x7fffff, /TIME areas (which are tipically used) start = (Pico.romsize + M68K_BANK_MASK) & ~M68K_BANK_MASK; + if (start < 0x400000) start = 0x400000; + cpu68k_map_set(m68k_read8_map, start, 0x7fffff, PicoRead8_sprot, 1); cpu68k_map_set(m68k_read16_map, start, 0x7fffff, PicoRead16_sprot, 1); cpu68k_map_set(m68k_write8_map, start, 0x7fffff, PicoWrite8_sprot, 1); @@ -597,32 +878,41 @@ void carthw_sprot_startup(void) PicoCartMemSetup = carthw_sprot_mem_setup; PicoCartUnloadHook = carthw_sprot_unload; + carthw_chunks = carthw_sprot_state; } /* Protection emulation for Lion King 3. Credits go to Haze */ -static u8 prot_lk3_cmd, prot_lk3_data; +static struct { + u32 bank; + u8 cmd, data; +} carthw_lk3_regs; + +static carthw_state_chunk carthw_lk3_state[] = +{ + { CHUNK_CARTHW, sizeof(carthw_lk3_regs), &carthw_lk3_regs }, + { 0, 0, NULL } +}; + +static u8 *carthw_lk3_mem; // shadow copy memory +static u32 carthw_lk3_madr[0x100000/M68K_BANK_SIZE]; static u32 PicoRead8_plk3(u32 a) { u32 d = 0; - switch (prot_lk3_cmd) { - case 1: d = prot_lk3_data >> 1; break; + switch (carthw_lk3_regs.cmd) { + case 0: d = carthw_lk3_regs.data << 1; break; + case 1: d = carthw_lk3_regs.data >> 1; break; case 2: // nibble rotate - d = ((prot_lk3_data >> 4) | (prot_lk3_data << 4)) & 0xff; + d = ((carthw_lk3_regs.data >> 4) | (carthw_lk3_regs.data << 4)) & 0xff; break; case 3: // bit rotate - d = prot_lk3_data; + d = carthw_lk3_regs.data; d = (d >> 4) | (d << 4); d = ((d & 0xcc) >> 2) | ((d & 0x33) << 2); d = ((d & 0xaa) >> 1) | ((d & 0x55) << 1); break; -/* Top Fighter 2000 MK VIII (Unl) - case 0x98: d = 0x50; break; // prot_lk3_data == a8 here - case 0x67: d = 0xde; break; // prot_lk3_data == 7b here (rot!) - case 0xb5: d = 0x9f; break; // prot_lk3_data == 4a -*/ default: - elprintf(EL_UIO, "unhandled prot cmd %02x @%06x", prot_lk3_cmd, SekPc); + elprintf(EL_UIO, "unhandled prot cmd %02x @%06x", carthw_lk3_regs.cmd, SekPc); break; } @@ -634,48 +924,207 @@ static void PicoWrite8_plk3p(u32 a, u32 d) { elprintf(EL_UIO, "prot w8 [%06x] %02x @%06x", a, d & 0xff, SekPc); if (a & 2) - prot_lk3_cmd = d; + carthw_lk3_regs.cmd = d & 0x3; else - prot_lk3_data = d; + carthw_lk3_regs.data = d; } static void PicoWrite8_plk3b(u32 a, u32 d) { - int addr; + u32 addr; elprintf(EL_UIO, "prot w8 [%06x] %02x @%06x", a, d & 0xff, SekPc); addr = d << 15; - if (addr + 0x8000 > Pico.romsize) { - elprintf(EL_UIO|EL_ANOMALY, "prot_lk3: bank too large: %02x", d); + if (addr+0x10000 >= Pico.romsize) { + elprintf(EL_UIO|EL_ANOMALY, "lk3_mapper: bank too large: %02x", d); return; } - if (addr == 0) - memcpy(Pico.rom, Pico.rom + Pico.romsize, 0x8000); - else - memcpy(Pico.rom, Pico.rom + addr, 0x8000); + + if (addr != carthw_lk3_regs.bank) { + // banking is by or'ing the bank address in the 1st megabyte, not adding. + // only do linear mapping if map addresses aren't overlapping bank address + u32 len = M68K_BANK_SIZE; + u32 a, b; + for (b = 0x000000; b < 0x0100000; b += len) { + if (!((b + (len-1)) & addr)) { + cpu68k_map_set(m68k_read8_map, b, b + (len-1), Pico.rom+addr + b, 0); + cpu68k_map_set(m68k_read16_map, b, b + (len-1), Pico.rom+addr + b, 0); + } else { + // overlap. ugh, need a shadow copy since banks can contain code and + // 68K cpu emulator cores need mapped access to code memory + if (carthw_lk3_madr[b/len] != addr) // only if shadow isn't the same + for (a = b; a < b+M68K_BANK_SIZE; a += 0x8000) + memcpy(carthw_lk3_mem + a, Pico.rom + (addr|a), 0x8000); + carthw_lk3_madr[b/len] = addr; + cpu68k_map_set(m68k_read8_map, b, b + (len-1), carthw_lk3_mem + b, 0); + cpu68k_map_set(m68k_read16_map, b, b + (len-1), carthw_lk3_mem + b, 0); + } + } + } + carthw_lk3_regs.bank = addr; } -static void carthw_prot_lk3_mem_setup(void) +static void carthw_lk3_mem_setup(void) { cpu68k_map_set(m68k_read8_map, 0x600000, 0x7fffff, PicoRead8_plk3, 1); cpu68k_map_set(m68k_write8_map, 0x600000, 0x6fffff, PicoWrite8_plk3p, 1); cpu68k_map_set(m68k_write8_map, 0x700000, 0x7fffff, PicoWrite8_plk3b, 1); + carthw_lk3_regs.bank = 0; } -void carthw_prot_lk3_startup(void) +static void carthw_lk3_statef(void) { - int ret; + PicoWrite8_plk3b(0x700000, carthw_lk3_regs.bank >> 15); +} +static void carthw_lk3_unload(void) +{ + free(carthw_lk3_mem); + carthw_lk3_mem = NULL; + memset(carthw_lk3_madr, 0, sizeof(carthw_lk3_madr)); +} + +void carthw_lk3_startup(void) +{ elprintf(EL_STATUS, "lk3 prot emu startup"); - // allocate space for bank0 backup - ret = PicoCartResize(Pico.romsize + 0x8000); - if (ret != 0) { + // allocate space for shadow copy + if (carthw_lk3_mem == NULL) + carthw_lk3_mem = malloc(0x100000); + if (carthw_lk3_mem == NULL) { elprintf(EL_STATUS, "OOM"); return; } - memcpy(Pico.rom + Pico.romsize, Pico.rom, 0x8000); - PicoCartMemSetup = carthw_prot_lk3_mem_setup; + PicoCartMemSetup = carthw_lk3_mem_setup; + PicoLoadStateHook = carthw_lk3_statef; + PicoCartUnloadHook = carthw_lk3_unload; + carthw_chunks = carthw_lk3_state; } +/* SMW64 mapper, based on mame source */ +static struct { + u32 bank60, bank61; + u16 data[8], ctrl[4]; +} carthw_smw64_regs; + +static carthw_state_chunk carthw_smw64_state[] = +{ + { CHUNK_CARTHW, sizeof(carthw_smw64_regs), &carthw_smw64_regs }, + { 0, 0, NULL } +}; + +static u32 PicoRead8_smw64(u32 a) +{ + u16 *data = carthw_smw64_regs.data, *ctrl = carthw_smw64_regs.ctrl; + u32 d = 0; + + if (a & 1) { + if (a>>16 == 0x66) switch ((a>>1) & 7) { + case 0: d = carthw_smw64_regs.data[0] ; break; + case 1: d = carthw_smw64_regs.data[0]+1; break; + case 2: d = carthw_smw64_regs.data[1] ; break; + case 3: d = carthw_smw64_regs.data[1]+1; break; + case 4: d = carthw_smw64_regs.data[2] ; break; + case 5: d = carthw_smw64_regs.data[2]+1; break; + case 6: d = carthw_smw64_regs.data[2]+2; break; + case 7: d = carthw_smw64_regs.data[2]+3; break; + } else /*0x67*/ { // :-O + if (ctrl[1] & 0x80) + d = ctrl[2] & 0x40 ? data[4]&data[5] : data[4]^0xff; + if (a & 2) + d &= 0x7f; + else if (ctrl[2] & 0x80) { + if (ctrl[2] & 0x20) + data[2] = (data[5] << 2) & 0xfc; + else + data[0] = ((data[4] << 1) ^ data[3]) & 0xfe; + } + } + } + + elprintf(EL_UIO, "prot r8 [%06x] %02x @%06x", a, d, SekPc); + return d; +} + +static u32 PicoRead16_smw64(u32 a) +{ + return PicoRead8_smw64(a+1); +} + +static void PicoWrite8_smw64(u32 a, u32 d) +{ + u16 *data = carthw_smw64_regs.data, *ctrl = carthw_smw64_regs.ctrl; + + if ((a & 3) == 1) { + switch (a >> 16) { + case 0x60: ctrl[0] = d; break; + case 0x64: data[4] = d; break; + case 0x67: + if (ctrl[1] & 0x80) { + carthw_smw64_regs.bank60 = 0x80000 + ((d<<14) & 0x70000); + cpu68k_map_set(m68k_read8_map, 0x600000, 0x60ffff, Pico.rom + carthw_smw64_regs.bank60, 0); + cpu68k_map_set(m68k_read16_map, 0x600000, 0x60ffff, Pico.rom + carthw_smw64_regs.bank60, 0); + } + ctrl[2] = d; + } + } else if ((a & 3) == 3) { + switch (a >> 16) { + case 0x61: ctrl[1] = d; break; + case 0x64: data[5] = d; break; + case 0x60: + switch (ctrl[0] & 7) { // :-O + case 0: data[0] = (data[0]^data[3] ^ d) & 0xfe; break; + case 1: data[1] = ( d) & 0xfe; break; + case 7: + carthw_smw64_regs.bank61 = 0x80000 + ((d<<14) & 0x70000); + cpu68k_map_set(m68k_read8_map, 0x610000, 0x61ffff, Pico.rom + carthw_smw64_regs.bank61, 0); + cpu68k_map_set(m68k_read16_map, 0x610000, 0x61ffff, Pico.rom + carthw_smw64_regs.bank61, 0); + break; + } + data[3] = d; + } + } +} + +static void PicoWrite16_smw64(u32 a, u32 d) +{ + PicoWrite8_smw64(a+1, d); +} + +static void carthw_smw64_mem_setup(void) +{ + // 1st 512 KB mirrored + cpu68k_map_set(m68k_read8_map, 0x080000, 0x0fffff, Pico.rom, 0); + cpu68k_map_set(m68k_read16_map, 0x080000, 0x0fffff, Pico.rom, 0); + + cpu68k_map_set(m68k_read8_map, 0x660000, 0x67ffff, PicoRead8_smw64, 1); + cpu68k_map_set(m68k_read16_map, 0x660000, 0x67ffff, PicoRead16_smw64, 1); + cpu68k_map_set(m68k_write8_map, 0x600000, 0x67ffff, PicoWrite8_smw64, 1); + cpu68k_map_set(m68k_write16_map, 0x600000, 0x67ffff, PicoWrite16_smw64, 1); +} + +static void carthw_smw64_statef(void) +{ + cpu68k_map_set(m68k_read8_map, 0x600000, 0x60ffff, Pico.rom + carthw_smw64_regs.bank60, 0); + cpu68k_map_set(m68k_read16_map, 0x600000, 0x60ffff, Pico.rom + carthw_smw64_regs.bank60, 0); + cpu68k_map_set(m68k_read8_map, 0x610000, 0x61ffff, Pico.rom + carthw_smw64_regs.bank61, 0); + cpu68k_map_set(m68k_read16_map, 0x610000, 0x61ffff, Pico.rom + carthw_smw64_regs.bank61, 0); +} + +static void carthw_smw64_reset(void) +{ + memset(&carthw_smw64_regs, 0, sizeof(carthw_smw64_regs)); +} + +void carthw_smw64_startup(void) +{ + elprintf(EL_STATUS, "SMW64 mapper startup"); + + PicoCartMemSetup = carthw_smw64_mem_setup; + PicoResetHook = carthw_smw64_reset; + PicoLoadStateHook = carthw_smw64_statef; + carthw_chunks = carthw_smw64_state; +} + +// vim:ts=2:sw=2:expandtab diff --git a/pico/carthw/carthw.h b/pico/carthw/carthw.h index 869a5cd6..bcf91da3 100644 --- a/pico/carthw/carthw.h +++ b/pico/carthw/carthw.h @@ -1,5 +1,6 @@ /* svp */ +#include "../pico_types.h" #include "svp/ssp16.h" typedef struct { @@ -14,15 +15,24 @@ void PicoSVPInit(void); void PicoSVPStartup(void); void PicoSVPMemSetup(void); -/* misc */ +/* standard/ssf2 mapper */ +extern int carthw_ssf2_active; +extern unsigned char carthw_ssf2_banks[8]; void carthw_ssf2_startup(void); +void carthw_ssf2_write8(u32 a, u32 d); + +/* misc */ void carthw_Xin1_startup(void); void carthw_realtec_startup(void); void carthw_radica_startup(void); void carthw_pier_startup(void); +void carthw_sf001_startup(void); +void carthw_sf002_startup(void); +void carthw_sf004_startup(void); void carthw_sprot_startup(void); void carthw_sprot_new_location(unsigned int a, unsigned int mask, unsigned short val, int is_ro); -void carthw_prot_lk3_startup(void); +void carthw_lk3_startup(void); +void carthw_smw64_startup(void); diff --git a/pico/carthw/eeprom_spi.c b/pico/carthw/eeprom_spi.c new file mode 100644 index 00000000..b250afff --- /dev/null +++ b/pico/carthw/eeprom_spi.c @@ -0,0 +1,360 @@ +/**************************************************************************** + * Genesis Plus + * SPI Serial EEPROM (25xxx/95xxx) support + * + * Copyright (C) 2012 Eke-Eke (Genesis Plus GX) + * + * Redistribution and use of this code or any derivative works are permitted + * provided that the following conditions are met: + * + * - Redistributions may not be sold, nor may they be used in a commercial + * product or activity. + * + * - Redistributions that are modified from the original source must include the + * complete source code, including the source code for all components used by a + * binary built from the modified sources. However, as a special exception, the + * source code distributed need not include anything that is normally distributed + * (in either source or binary form) with the major components (compiler, kernel, + * and so on) of the operating system on which the executable runs, unless that + * component itself accompanies the executable. + * + * - Redistributions must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************************/ + +#include "../pico_int.h" +#include "../cd/genplus_macros.h" +#include "eeprom_spi.h" + +/* max supported size 64KB (25x512/95x512) */ +#define SIZE_MASK 0xffff +#define PAGE_MASK 0x7f + +/* hard-coded board implementation (!WP pin not used) */ +#define BIT_DATA (0) +#define BIT_CLK (1) +#define BIT_HOLD (2) +#define BIT_CS (3) + +typedef enum +{ + STANDBY, + GET_OPCODE, + GET_ADDRESS, + WRITE_BYTE, + READ_BYTE +} T_STATE_SPI; + +typedef struct +{ + uint8 cs; /* !CS line state */ + uint8 clk; /* SCLK line state */ + uint8 out; /* SO line state */ + uint8 status; /* status register */ + uint8 opcode; /* 8-bit opcode */ + uint8 buffer; /* 8-bit data buffer */ + uint16 addr; /* 16-bit address */ + uint32 cycles; /* current operation cycle */ + T_STATE_SPI state; /* current operation state */ +} T_EEPROM_SPI; + +static T_EEPROM_SPI spi_eeprom; + +void *eeprom_spi_init(int *size) +{ + /* reset eeprom state */ + memset(&spi_eeprom, 0, sizeof(T_EEPROM_SPI)); + spi_eeprom.out = 1; + spi_eeprom.state = GET_OPCODE; + + if (size) + *size = sizeof(T_EEPROM_SPI); + return &spi_eeprom; +} + +void eeprom_spi_write(unsigned char data) +{ + /* Make sure !HOLD is high */ + if (data & (1 << BIT_HOLD)) + { + /* Check !CS state */ + if (data & (1 << BIT_CS)) + { + /* !CS high -> end of current operation */ + spi_eeprom.cycles = 0; + spi_eeprom.out = 1; + spi_eeprom.opcode = 0; + spi_eeprom.state = GET_OPCODE; + } + else + { + /* !CS low -> process current operation */ + switch (spi_eeprom.state) + { + case GET_OPCODE: + { + /* latch data on CLK positive edge */ + if ((data & (1 << BIT_CLK)) && !spi_eeprom.clk) + { + /* 8-bit opcode buffer */ + spi_eeprom.opcode |= ((data >> BIT_DATA) & 1); + spi_eeprom.cycles++; + + /* last bit ? */ + if (spi_eeprom.cycles == 8) + { + /* reset cycles count */ + spi_eeprom.cycles = 0; + + /* Decode instruction */ + switch (spi_eeprom.opcode) + { + case 0x01: + { + /* WRITE STATUS */ + spi_eeprom.buffer = 0; + spi_eeprom.state = WRITE_BYTE; + break; + } + + case 0x02: + { + /* WRITE BYTE */ + spi_eeprom.addr = 0; + spi_eeprom.state = GET_ADDRESS; + break; + } + + case 0x03: + { + /* READ BYTE */ + spi_eeprom.addr = 0; + spi_eeprom.state = GET_ADDRESS; + break; + } + + case 0x04: + { + /* WRITE DISABLE */ + spi_eeprom.status &= ~0x02; + spi_eeprom.state = STANDBY; + break; + } + + case 0x05: + { + /* READ STATUS */ + spi_eeprom.buffer = spi_eeprom.status; + spi_eeprom.state = READ_BYTE; + break; + } + + case 0x06: + { + /* WRITE ENABLE */ + spi_eeprom.status |= 0x02; + spi_eeprom.state = STANDBY; + break; + } + + default: + { + /* specific instructions (not supported) */ + spi_eeprom.state = STANDBY; + break; + } + } + } + else + { + /* shift opcode value */ + spi_eeprom.opcode = spi_eeprom.opcode << 1; + } + } + break; + } + + case GET_ADDRESS: + { + /* latch data on CLK positive edge */ + if ((data & (1 << BIT_CLK)) && !spi_eeprom.clk) + { + /* 16-bit address */ + spi_eeprom.addr |= ((data >> BIT_DATA) & 1); + spi_eeprom.cycles++; + + /* last bit ? */ + if (spi_eeprom.cycles == 16) + { + /* reset cycles count */ + spi_eeprom.cycles = 0; + + /* mask unused address bits */ + spi_eeprom.addr &= SIZE_MASK; + + /* operation type */ + if (spi_eeprom.opcode & 0x01) + { + /* READ operation */ + spi_eeprom.buffer = Pico.sv.data[spi_eeprom.addr]; + spi_eeprom.state = READ_BYTE; + } + else + { + /* WRITE operation */ + spi_eeprom.buffer = 0; + spi_eeprom.state = WRITE_BYTE; + } + } + else + { + /* shift address value */ + spi_eeprom.addr = spi_eeprom.addr << 1; + } + } + break; + } + + case WRITE_BYTE: + { + /* latch data on CLK positive edge */ + if ((data & (1 << BIT_CLK)) && !spi_eeprom.clk) + { + /* 8-bit data buffer */ + spi_eeprom.buffer |= ((data >> BIT_DATA) & 1); + spi_eeprom.cycles++; + + /* last bit ? */ + if (spi_eeprom.cycles == 8) + { + /* reset cycles count */ + spi_eeprom.cycles = 0; + + /* write data to destination */ + if (spi_eeprom.opcode & 0x01) + { + /* update status register */ + spi_eeprom.status = (spi_eeprom.status & 0x02) | (spi_eeprom.buffer & 0x0c); + + /* wait for operation end */ + spi_eeprom.state = STANDBY; + } + else + { + /* Memory Array (write-protected) */ + if (spi_eeprom.status & 2) + { + /* check array protection bits (BP0, BP1) */ + switch ((spi_eeprom.status >> 2) & 0x03) + { + case 0x01: + { + /* $C000-$FFFF (sector #3) is protected */ + if (spi_eeprom.addr < 0xC000) + { + Pico.sv.data[spi_eeprom.addr] = spi_eeprom.buffer; + } + break; + } + + case 0x02: + { + /* $8000-$FFFF (sectors #2 and #3) is protected */ + if (spi_eeprom.addr < 0x8000) + { + Pico.sv.data[spi_eeprom.addr] = spi_eeprom.buffer; + } + break; + } + + case 0x03: + { + /* $0000-$FFFF (all sectors) is protected */ + break; + } + + default: + { + /* no sectors protected */ + Pico.sv.data[spi_eeprom.addr] = spi_eeprom.buffer; + break; + } + } + } + + /* reset data buffer */ + spi_eeprom.buffer = 0; + + /* increase array address (sequential writes are limited within the same page) */ + spi_eeprom.addr = (spi_eeprom.addr & ~PAGE_MASK) | ((spi_eeprom.addr + 1) & PAGE_MASK); + } + } + else + { + /* shift data buffer value */ + spi_eeprom.buffer = spi_eeprom.buffer << 1; + } + } + break; + } + + case READ_BYTE: + { + /* output data on CLK positive edge */ + if ((data & (1 << BIT_CLK)) && !spi_eeprom.clk) + { + /* read out bits */ + spi_eeprom.out = (spi_eeprom.buffer >> (7 - spi_eeprom.cycles)) & 1; + spi_eeprom.cycles++; + + /* last bit ? */ + if (spi_eeprom.cycles == 8) + { + /* reset cycles count */ + spi_eeprom.cycles = 0; + + /* read from memory array ? */ + if (spi_eeprom.opcode == 0x03) + { + /* read next array byte */ + spi_eeprom.addr = (spi_eeprom.addr + 1) & SIZE_MASK; + spi_eeprom.buffer = Pico.sv.data[spi_eeprom.addr]; + } + } + } + break; + } + + default: + { + /* wait for !CS low->high transition */ + break; + } + } + } + } + + /* update input lines */ + spi_eeprom.cs = (data >> BIT_CS) & 1; + spi_eeprom.clk = (data >> BIT_CLK) & 1; +} + +unsigned int eeprom_spi_read(unsigned int address) +{ + return (spi_eeprom.out << BIT_DATA); +} + diff --git a/pico/carthw/eeprom_spi.h b/pico/carthw/eeprom_spi.h new file mode 100644 index 00000000..2d60e0f4 --- /dev/null +++ b/pico/carthw/eeprom_spi.h @@ -0,0 +1,47 @@ +/**************************************************************************** + * Genesis Plus + * SPI Serial EEPROM (25XX512 only) support + * + * Copyright (C) 2012 Eke-Eke (Genesis Plus GX) + * + * Redistribution and use of this code or any derivative works are permitted + * provided that the following conditions are met: + * + * - Redistributions may not be sold, nor may they be used in a commercial + * product or activity. + * + * - Redistributions that are modified from the original source must include the + * complete source code, including the source code for all components used by a + * binary built from the modified sources. However, as a special exception, the + * source code distributed need not include anything that is normally distributed + * (in either source or binary form) with the major components (compiler, kernel, + * and so on) of the operating system on which the executable runs, unless that + * component itself accompanies the executable. + * + * - Redistributions must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************************/ + +#ifndef _EEPROM_SPI_H_ +#define _EEPROM_SPI_H_ + +/* Function prototypes */ +extern void *eeprom_spi_init(int *size); +extern void eeprom_spi_write(unsigned char data); +extern unsigned int eeprom_spi_read(unsigned int address); + +#endif diff --git a/pico/carthw/svp/compiler.c b/pico/carthw/svp/compiler.c index b31197c2..9cc0f883 100644 --- a/pico/carthw/svp/compiler.c +++ b/pico/carthw/svp/compiler.c @@ -1,13 +1,14 @@ /* * SSP1601 to ARM recompiler * (C) notaz, 2008,2009,2010 + * (C) irixxxx, 2019-2023 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ -#include "../../pico_int.h" -#include "../../../cpu/drc/cmn.h" +#include +#include #include "compiler.h" // FIXME: asm has these hardcoded @@ -39,7 +40,7 @@ void ssp_drc_end(void){} #endif #define COUNT_OP -#include "../../../cpu/drc/emit_arm.c" +#include // ----------------------------------------------------- @@ -359,7 +360,7 @@ static void tr_mov16(int r, int val) static void tr_mov16_cond(int cond, int r, int val) { - emith_op_imm(cond, 0, A_OP_MOV, r, val); + emith_move_r_imm_c(cond, r, val); hostreg_r[r] = -1; } @@ -476,6 +477,7 @@ static void tr_ptrr_mod(int r, int mod, int need_modulo, int count) if (mod == 2) known_regs.r[r] = (known_regs.r[r] & ~modulo) | ((known_regs.r[r] - count) & modulo); else known_regs.r[r] = (known_regs.r[r] & ~modulo) | ((known_regs.r[r] + count) & modulo); + dirty_regb |= (1 << (r + 8)); } else { @@ -693,9 +695,9 @@ static int tr_aop_ssp2arm(int op) /* spacial version of call for calling C needed on ios, since we use r9.. */ static void emith_call_c_func(void *target) { - EOP_STMFD_SP(A_R7M|A_R9M); + EOP_STMFD_SP(M2(7,9)); emith_call(target); - EOP_LDMFD_SP(A_R7M|A_R9M); + EOP_LDMFD_SP(M2(7,9)); } #else #define emith_call_c_func emith_call @@ -842,6 +844,7 @@ static void tr_PMX_to_r0(int reg) return; } + tr_flush_dirty_pmcrs(); known_regb &= ~KRREG_PMC; dirty_regb &= ~KRREG_PMC; known_regb &= ~(1 << (20+reg)); @@ -849,7 +852,6 @@ static void tr_PMX_to_r0(int reg) // call the C code to handle this tr_flush_dirty_ST(); - //tr_flush_dirty_pmcrs(); tr_mov16(0, reg); emith_call_c_func(ssp_pm_read); hostreg_clear(); @@ -989,6 +991,7 @@ static void tr_r0_to_ST(int const_val) EOP_ORR_REG_LSL(6, 6, 1, 4); // orr r6, r6, r1, lsl #4 TR_WRITE_R0_TO_REG(SSP_ST); hostreg_r[1] = -1; + known_regb &= ~KRREG_ST; dirty_regb &= ~KRREG_ST; } @@ -1021,9 +1024,9 @@ static void tr_r0_to_AL(int const_val) hostreg_sspreg_changed(SSP_AL); if (const_val != -1) { known_regs.gr[SSP_A].l = const_val; - known_regb |= 1 << SSP_AL; + known_regb |= KRREG_AL; } else - known_regb &= ~(1 << SSP_AL); + known_regb &= ~KRREG_AL; } static void tr_r0_to_PMX(int reg) @@ -1083,6 +1086,7 @@ static void tr_r0_to_PMX(int reg) return; } + tr_flush_dirty_pmcrs(); known_regb &= ~KRREG_PMC; dirty_regb &= ~KRREG_PMC; known_regb &= ~(1 << (25+reg)); @@ -1090,7 +1094,6 @@ static void tr_r0_to_PMX(int reg) // call the C code to handle this tr_flush_dirty_ST(); - //tr_flush_dirty_pmcrs(); tr_mov16(1, reg); emith_call_c_func(ssp_pm_write); hostreg_clear(); @@ -1128,16 +1131,17 @@ static void tr_r0_to_PMC(int const_val) known_regs.emu_status |= SSP_PMC_HAVE_ADDR; known_regs.pmc.l = const_val; } + dirty_regb |= KRREG_PMC; } else { tr_flush_dirty_ST(); - if (known_regb & KRREG_PMC) { + if (dirty_regb & KRREG_PMC) { emith_move_r_imm(1, known_regs.pmc.v); EOP_STR_IMM(1,7,0x400+SSP_PMC*4); - known_regb &= ~KRREG_PMC; dirty_regb &= ~KRREG_PMC; } + known_regb &= ~KRREG_PMC; EOP_LDR_IMM(1,7,0x484); // ldr r1, [r7, #0x484] // emu_status EOP_ADD_IMM(2,7,24/2,4); // add r2, r7, #0x400 EOP_TST_IMM(1, 0, SSP_PMC_HAVE_ADDR); @@ -1245,7 +1249,7 @@ static int tr_detect_pm0_block(unsigned int op, int *pc, int imm) EOP_ORR_IMM(6, 6, 24/2, 6); // orr r6, r6, 0x600 hostreg_sspreg_changed(SSP_ST); known_regs.gr[SSP_ST].h = 0x60; - known_regb |= 1 << SSP_ST; + known_regb |= KRREG_ST; dirty_regb &= ~KRREG_ST; (*pc) += 3*2; n_in_ops += 3; @@ -1434,16 +1438,13 @@ static int translate_op(unsigned int op, int *pc, int imm, int *end_cond, int *j tmpv = tr_cond_check(op); if (tmpv != A_COND_AL) { jump_op = tcache_ptr; - EOP_MOV_IMM(0, 0, 0); // placeholder for branch + EOP_C_B(tmpv, 0, 0); // placeholder for branch } tr_mov16(0, *pc); tr_r0_to_STACK(*pc); - if (tmpv != A_COND_AL) { - u32 *real_ptr = tcache_ptr; - tcache_ptr = jump_op; - EOP_C_B(tr_neg_cond(tmpv),0,real_ptr - jump_op - 2); - tcache_ptr = real_ptr; - } + if (tmpv != A_COND_AL) + EOP_C_B_PTR(jump_op, tr_neg_cond(tmpv), 0, + tcache_ptr - jump_op - 2); tr_mov16_cond(tmpv, 0, imm); if (tmpv != A_COND_AL) tr_mov16_cond(tr_neg_cond(tmpv), 0, *pc); @@ -1517,8 +1518,8 @@ static int translate_op(unsigned int op, int *pc, int imm, int *end_cond, int *j tr_make_dirty_ST(); EOP_C_DOP_REG_XIMM(A_COND_AL,A_OP_SUB,1,5,5,0,A_AM1_LSL,10); // subs r5, r5, r10 hostreg_sspreg_changed(SSP_A); - known_regb &= ~(KRREG_A|KRREG_AL); dirty_regb |= KRREG_ST; + known_regb &= ~(KRREG_A|KRREG_AL|KRREG_ST); ret++; break; // mpya (rj), (ri), b @@ -1528,8 +1529,8 @@ static int translate_op(unsigned int op, int *pc, int imm, int *end_cond, int *j tr_make_dirty_ST(); EOP_C_DOP_REG_XIMM(A_COND_AL,A_OP_ADD,1,5,5,0,A_AM1_LSL,10); // adds r5, r5, r10 hostreg_sspreg_changed(SSP_A); - known_regb &= ~(KRREG_A|KRREG_AL); dirty_regb |= KRREG_ST; + known_regb &= ~(KRREG_A|KRREG_AL|KRREG_ST); ret++; break; // mld (rj), (ri), b @@ -1537,8 +1538,9 @@ static int translate_op(unsigned int op, int *pc, int imm, int *end_cond, int *j EOP_C_DOP_IMM(A_COND_AL,A_OP_MOV,1,0,5,0,0); // movs r5, #0 hostreg_sspreg_changed(SSP_A); known_regs.gr[SSP_A].v = 0; - known_regb |= (KRREG_A|KRREG_AL); dirty_regb |= KRREG_ST; + known_regb &= ~KRREG_ST; + known_regb |= (KRREG_A|KRREG_AL); tr_mac_load_XY(op); ret++; break; @@ -1712,12 +1714,8 @@ static void *emit_block_epilogue(int cycles, int cond, int pc, int end_pc) ssp_block_table[pc]; if (target != NULL) emith_jump(target); - else { - int ops = emith_jump(ssp_drc_next); - end_ptr = tcache_ptr; - // cause the next block to be emitted over jump instruction - tcache_ptr -= ops; - } + else + emith_jump(ssp_drc_next); } else { u32 *target1 = (pc < 0x400) ? @@ -1795,17 +1793,18 @@ void *ssp_translate_block(int pc) tr_flush_dirty_ST(); tr_flush_dirty_pmcrs(); block_end = emit_block_epilogue(ccount, end_cond, jump_pc, pc); + emith_flush(); + emith_pool_commit(-1); if (tcache_ptr - (u32 *)tcache > DRC_TCACHE_SIZE/4) { elprintf(EL_ANOMALY|EL_STATUS|EL_SVP, "tcache overflow!\n"); - fflush(stdout); exit(1); } // stats nblocks++; - //printf("%i blocks, %i bytes, k=%.3f\n", nblocks, (tcache_ptr - tcache)*4, - // (double)(tcache_ptr - tcache) / (double)n_in_ops); + //printf("%i blocks, %i bytes, k=%.3f\n", nblocks, (u8 *)tcache_ptr - tcache, + // (double)((u8 *)tcache_ptr - tcache) / (double)n_in_ops); #ifdef DUMP_BLOCK { diff --git a/pico/carthw/svp/memory.c b/pico/carthw/svp/memory.c index b37985e9..6fb3c3ed 100644 --- a/pico/carthw/svp/memory.c +++ b/pico/carthw/svp/memory.c @@ -26,8 +26,8 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include "../../pico_int.h" -#include "../../memory.h" +#include +#include // for wait loop det static void PicoWrite16_dram(u32 a, u32 d) @@ -87,7 +87,7 @@ static u32 PicoRead16_svpr(u32 a) a15004_looping = 0; if (!a15004_looping) - elprintf(EL_SVP, "SVP r%i: [%06x] %04x @%06x", realsize, a, d, SekPc); + elprintf(EL_SVP, "SVP r: [%06x] %04x @%06x", a, d, SekPc); if (a == 0xa15004 && !(d&1)) { if (!a15004_looping) diff --git a/pico/carthw/svp/ssp16.c b/pico/carthw/svp/ssp16.c index e8de0013..229449fb 100644 --- a/pico/carthw/svp/ssp16.c +++ b/pico/carthw/svp/ssp16.c @@ -206,7 +206,7 @@ * ops not used by VR are not implemented */ -#include "../../pico_int.h" +#include #define u32 unsigned int @@ -474,6 +474,8 @@ static int get_inc(int mode) static u32 pm_io(int reg, int write, u32 d) { + unsigned int *pmac; + if (ssp->emu_status & SSP_PMC_SET) { // this MUST be blind r or w @@ -484,7 +486,8 @@ static u32 pm_io(int reg, int write, u32 d) return 0; } elprintf(EL_SVP, "PM%i (%c) set to %08x @ %04x", reg, write ? 'w' : 'r', rPMC.v, GET_PPC_OFFS()); - ssp->pmac_read[write ? reg + 6 : reg] = rPMC.v; + pmac = write ? ssp->pmac_write : ssp->pmac_read; + pmac[reg] = rPMC.v; ssp->emu_status &= ~SSP_PMC_SET; if ((rPMC.v & 0x7fffff) == 0x1c8000 || (rPMC.v & 0x7fffff) == 0x1c8240) { elprintf(EL_SVP, "ssp IRAM copy from %06x to %04x", (ssp->RAM1[0]-1)<<1, (rPMC.v&0x7fff)<<1); @@ -573,7 +576,8 @@ static u32 pm_io(int reg, int write, u32 d) } // PMC value corresponds to last PMR accessed (not sure). - rPMC.v = ssp->pmac_read[write ? reg + 6 : reg]; + pmac = write ? ssp->pmac_write : ssp->pmac_read; + rPMC.v = pmac[reg]; return d; } diff --git a/pico/carthw/svp/ssp16.h b/pico/carthw/svp/ssp16.h index 318a4a7e..67f4f459 100644 --- a/pico/carthw/svp/ssp16.h +++ b/pico/carthw/svp/ssp16.h @@ -38,8 +38,13 @@ typedef union { unsigned int v; struct { +#if CPU_IS_LE unsigned short l; unsigned short h; +#else + unsigned short h; + unsigned short l; +#endif }; } ssp_reg_t; diff --git a/pico/carthw/svp/stub_arm.S b/pico/carthw/svp/stub_arm.S index 9d5c5fa1..cb1c1cd5 100644 --- a/pico/carthw/svp/stub_arm.S +++ b/pico/carthw/svp/stub_arm.S @@ -6,9 +6,9 @@ @* See COPYING file in the top-level directory. @* -#include "../../arm_features.h" +#include -.syntax unified +@.syntax unified .text .align 2 @@ -281,8 +281,8 @@ ssp_hle_902_loop: bgt ssp_hle_902_loop tst r12, #1 - ldrhne r0, [r2], #2 - strhne r0, [r3], #2 + ldrneh r0, [r2], #2 + strneh r0, [r3], #2 ldr r0, [r7, #SSP_OFFS_IRAM_ROM] add r1, r7, #0x200 @@ -501,7 +501,7 @@ FUNCTION(ssp_hle_07_036): mov r12, #0x4000 orr r12,r12,#0x0018 subs r12,r3, r12 - subsne r12,r12,#0x0400 + subnes r12,r12,#0x0400 blne tr_unhandled orr r2, r2, r2, lsl #16 @@ -510,7 +510,7 @@ FUNCTION(ssp_hle_07_036): hle_07_036_no_ovrwr: tst r1, #2 - strhne r2, [r1], #0x3e @ align + strneh r2, [r1], #0x3e @ align subne r0, r0, #1 subs r0, r0, #4 blt hle_07_036_l2 @@ -525,7 +525,7 @@ hle_07_036_l2: tst r0, #2 strne r2, [r1], #0x40 tst r0, #1 - strhne r2, [r1], #2 + strneh r2, [r1], #2 b hle_07_036_end_copy hle_07_036_ovrwr: @@ -562,10 +562,10 @@ hle_07_036_ol1: hle_07_036_ol2: tst r0, #1 - ldrhne r3, [r1] + ldrneh r3, [r1] andne r3, r3, r12 orrne r3, r3, r2 - strhne r3, [r1], #2 + strneh r3, [r1], #2 hle_07_036_end_copy: ldr r2, [r7, #SSP_OFFS_DRAM] diff --git a/pico/carthw/svp/svp.c b/pico/carthw/svp/svp.c index 1bccb3f6..fb88fa36 100644 --- a/pico/carthw/svp/svp.c +++ b/pico/carthw/svp/svp.c @@ -30,8 +30,9 @@ #include #include "compiler.h" +#define SVP_CYCLES_LINE 850 + svp_t *svp = NULL; -int PicoSVPCycles = 850; // cycles/line, just a guess static int svp_dyn_ready = 0; /* save state stuff */ @@ -57,7 +58,7 @@ static void PicoSVPReset(void) memcpy(svp->iram_rom + 0x800, Pico.rom + 0x800, 0x20000 - 0x800); ssp1601_reset(&svp->ssp1601); #ifdef _SVP_DRC - if ((PicoOpt & POPT_EN_DRC) && svp_dyn_ready) + if ((PicoIn.opt & POPT_EN_DRC) && svp_dyn_ready) ssp1601_dyn_reset(&svp->ssp1601); #endif } @@ -77,37 +78,34 @@ static void PicoSVPLine(void) #endif #ifdef _SVP_DRC - if ((PicoOpt & POPT_EN_DRC) && svp_dyn_ready) - ssp1601_dyn_run(PicoSVPCycles * count); + if ((PicoIn.opt & POPT_EN_DRC) && svp_dyn_ready) + ssp1601_dyn_run(SVP_CYCLES_LINE * count); else #endif { - ssp1601_run(PicoSVPCycles * count); + ssp1601_run(SVP_CYCLES_LINE * count); svp_dyn_ready = 0; // just in case } // test mode - //if (Pico.m.frame_count == 13) PicoPad[0] |= 0xff; + //if (Pico.m.frame_count == 13) PicoIn.pad[0] |= 0xff; } -static int PicoSVPDma(unsigned int source, int len, unsigned short **srcp, unsigned short **limitp) +static int PicoSVPDma(u32 source, int len, unsigned short **base, u32 *mask) { if (source < Pico.romsize) // Rom { - source -= 2; - *srcp = (unsigned short *)(Pico.rom + (source&~1)); - *limitp = (unsigned short *)(Pico.rom + Pico.romsize); - return 1; + *base = (unsigned short *)(Pico.rom + (source & 0xfe0000)); + *mask = 0x1ffff; + return source - 2; } else if ((source & 0xfe0000) == 0x300000) { elprintf(EL_VDPDMA|EL_SVP, "SVP DmaSlow from %06x, len=%i", source, len); - source &= 0x1fffe; - source -= 2; - *srcp = (unsigned short *)(svp->dram + source); - *limitp = (unsigned short *)(svp->dram + sizeof(svp->dram)); - return 1; + *base = (unsigned short *)svp->dram; + *mask = 0x1ffff; + return source - 2; } else elprintf(EL_VDPDMA|EL_SVP|EL_ANOMALY, "SVP FIXME unhandled DmaSlow from %06x, len=%i", source, len); @@ -151,7 +149,7 @@ void PicoSVPStartup(void) // init SVP compiler svp_dyn_ready = 0; #ifdef _SVP_DRC - if (PicoOpt & POPT_EN_DRC) { + if (PicoIn.opt & POPT_EN_DRC) { if (ssp1601_dyn_startup()) return; svp_dyn_ready = 1; @@ -170,6 +168,6 @@ void PicoSVPStartup(void) svp_states[1].ptr = svp->dram; svp_states[2].ptr = &svp->ssp1601; carthw_chunks = svp_states; - PicoAHW |= PAHW_SVP; + PicoIn.AHW |= PAHW_SVP; } diff --git a/pico/carthw_cfg.c b/pico/carthw_cfg.c index 91ddec74..8c46a5fa 100644 --- a/pico/carthw_cfg.c +++ b/pico/carthw_cfg.c @@ -1,4 +1,4 @@ -/* generated by ./tools/make_carthw_c, do not modify */ +/* generated by tools/make_carthw_c, do not modify */ static const char builtin_carthw_cfg[] = "[]\n" "check_str=0x150,\"Virtua Racing\"\n" @@ -9,12 +9,28 @@ static const char builtin_carthw_cfg[] = "check_str=0x810,\"OHMP\"\n" "hw=svp\n" "[]\n" - "check_str=0x100,\"SEGA PICO\"\n" + "check_str=0x100,\"SEGA IAC \"\n" "hw=pico\n" "[]\n" - "check_str=0x100,\"IMA IKUNOUJYUKU\"\n" + "check_str=0x100,\"IMA IKUNO\"\n" "hw=pico\n" "[]\n" + "check_str=0x120,\"32X SAMPLE PROGRAM\"\n" + "check_str=0x32b74c,\"Bishop Level\"\n" + "prop=force_6btn\n" + "[]\n" + "check_str=0x100,\"SEGA 32X\"\n" + "check_str=0x150,\"WWF RAW\"\n" + "prop=wwfraw_hack\n" + "[]\n" + "check_str=0x100,\"SEGA 32X\"\n" + "check_str=0x120,\"BLACKTHORNE\"\n" + "prop=blackthorne_hack\n" + "[]\n" + "check_str=0x100,\"SEGA\"\n" + "check_str=0x150,\"MARS CHECK PROGRAM\"\n" + "prop=marscheck_hack\n" + "[]\n" "check_str=0x120,\"PUGGSY\"\n" "prop=no_sram\n" "[]\n" @@ -24,9 +40,11 @@ static const char builtin_carthw_cfg[] = "check_str=0x150,\"MICRO MACHINES II\"\n" "prop=filled_sram\n" "[]\n" - "check_str=0x150,\"32X SAMPLE PROGRAM\"\n" - "check_str=0x32b74c,\"Bishop Level\"\n" - "prop=force_6btn\n" + "check_str=0x150,\" HardBall III\"\n" + "sram_range=0x200000,0x20ffff\n" + "[]\n" + "check_str=0x100,\"SEGA SSF\"\n" + "hw=ssf2_mapper\n" "[]\n" "check_str=0x150,\"SUPER STREET FIGHTER2 The New Challengers\"\n" "hw=ssf2_mapper\n" @@ -36,6 +54,18 @@ static const char builtin_carthw_cfg[] = "check_str=0x610,\"Respect\"\n" "hw=piersolar_mapper\n" "[]\n" + "check_str=0x150,\"BEGGAR PRINCE\"\n" + "hw=sf001_mapper\n" + "sram_range=0x400000,0x40ffff\n" + "prop=filled_sram\n" + "[]\n" + "check_str=0x150,\"LEGEND OF WUKONG\"\n" + "hw=sf002_mapper\n" + "[]\n" + "check_str=0x150,\"STAR ODYSSEY\"\n" + "hw=sf004_mapper\n" + "sram_range=0x200000,0x207fff\n" + "[]\n" "check_str=0x120,\"FLICKY\"\n" "check_size_gt=0x020000\n" "hw=x_in_1_mapper\n" @@ -48,6 +78,14 @@ static const char builtin_carthw_cfg[] = "check_size_gt=0x080000\n" "hw=x_in_1_mapper\n" "[]\n" + "check_str=0x150,\"TINY TOON ADVENTURES\"\n" + "check_size_gt=0x080000\n" + "hw=x_in_1_mapper\n" + "[]\n" + "check_str=0x160,\"FS MOONWALKER\"\n" + "check_size_gt=0x080000\n" + "hw=x_in_1_mapper\n" + "[]\n" "check_str=0x150,\" SHOVE IT!\"\n" "check_size_gt=0x020000\n" "hw=x_in_1_mapper\n" @@ -60,6 +98,11 @@ static const char builtin_carthw_cfg[] = "check_size_gt=0x100000\n" "hw=radica_mapper\n" "[]\n" + "check_str=0x150,\"STREET FIGHTER\"\n" + "check_str=0x161,\"SPECIAL CHAMPION EDITION\"\n" + "check_size_gt=0x300000\n" + "hw=radica_mapper\n" + "[]\n" "check_str=0x94,\"THE EARTH DEFEND\"\n" "hw=realtec_mapper\n" "[]\n" @@ -77,6 +120,11 @@ static const char builtin_carthw_cfg[] = "eeprom_type=3\n" "eeprom_lines=8,0,0\n" "[]\n" + "check_str=0x150,\"GREATEST HEAVYWEIGHTS\"\n" + "sram_range=0x200000,0x200001\n" + "eeprom_type=1\n" + "eeprom_lines=1,0,0\n" + "[]\n" "check_str=0x150,\"MICRO MACHINES II\"\n" "sram_range=0x300000,0x380001\n" "eeprom_type=2\n" @@ -134,62 +182,182 @@ static const char builtin_carthw_cfg[] = "eeprom_type=1\n" "eeprom_lines=6,7,7\n" "[]\n" + "check_str=0x150,\"MLBPA SPORTS TALK BASEBALL\"\n" + "sram_range=0x200000,0x200001\n" + "eeprom_type=1\n" + "eeprom_lines=1,0,0\n" + "[]\n" + "check_str=0x104,\" are Registered Trademarks\"\n" + "check_crc32=0xc9706e25\n" + "hw=lk3_mapper\n" + "[]\n" + "check_str=0x104,\" are Registered Trademarks\"\n" + "check_crc32=0x4c98cc30\n" + "hw=lk3_mapper\n" + "[]\n" + "check_str=0x104,\" are Registered Trademarks\"\n" + "check_crc32=0x6ec032cb\n" + "hw=lk3_mapper\n" + "[]\n" "check_str=0x104,\" \"\n" - "check_crc32=0x10458e09\n" + "check_crc32=0x0d097f5c\n" + "hw=lk3_mapper\n" + "[]\n" + "check_str=0x104,\" \"\n" + "check_crc32=0xbf7219df\n" + "hw=lk3_mapper\n" + "[]\n" + "check_str=0x104,\" \"\n" + "check_crc32=0xb5b7606e\n" + "hw=lk3_mapper\n" + "[]\n" + "check_str=0x104,\" \"\n" + "check_crc32=0xab3ae5e9\n" + "hw=lk3_mapper\n" + "[]\n" + "check_str=0x104,\" \"\n" + "check_crc32=0x802f53f9\n" + "hw=lk3_mapper\n" + "[]\n" + "check_csum=0\n" + "check_crc32=0xf63b7bdc\n" + "hw=smw64_mapper\n" + "[]\n" + "check_str=0x104,\" \"\n" + "check_crc32=0x50aa5a9b\n" "hw=prot\n" "prot_ro_value16=0xa13000,0xffff00,0x28\n" "[]\n" - "check_str=0x172,\"GAME : ELF WOR\"\n" + "check_csum=0\n" + "check_crc32=0xee20be2c\n" "hw=prot\n" - "prot_ro_value16=0x400000,-2,0x5500\n" - "prot_ro_value16=0x400002,-2,0xc900#checkisdoneiftheaboveonefails\n" - "prot_ro_value16=0x400004,-2,0x0f00\n" - "prot_ro_value16=0x400006,-2,0x1800#similartoabove\n" + "prot_ro_value16=0xa13000,-2,0x0c\n" + "[]\n" + "check_str=0x104,\"SEGASEGASEGA\"\n" + "check_crc32=0xc3616596\n" + "hw=prot\n" + "prot_ro_value16=0xa13000,-2,0x1c\n" + "[]\n" + "check_str=0x104,\" are Registered Trademarks\"\n" + "check_crc32=0x7861fb28\n" + "hw=prot\n" + "prot_ro_value16=0xa13000,-2,0x0a\n" "[]\n" "check_str=0x104,\" \"\n" - "check_crc32=0xcbc38eea\n" + "check_crc32=0xf4cb9b37\n" + "hw=prot\n" + "prot_ro_value16=0xa13000,-2,0x00\n" + "prot_ro_value16=0xa13002,-2,0x01\n" + "prot_ro_value16=0xa1303e,-2,0x1f\n" + "[]\n" + "check_str=0x104,\" \"\n" + "check_crc32=0x7bdfb390\n" + "hw=prot\n" + "prot_ro_value16=0xa13000,-2,0x00\n" + "prot_ro_value16=0xa13002,-2,0x01\n" + "prot_ro_value16=0xa1303e,-2,0x1f\n" + "[]\n" + "check_str=0x104,\" \"\n" + "check_crc32=0x8fb8b29e\n" "hw=prot\n" "prot_ro_value16=0x480000,0xff0000,0xaa00\n" "prot_ro_value16=0x4a0000,0xff0000,0x0a00\n" "prot_ro_value16=0x4c0000,0xff0000,0xf000\n" - "prot_ro_value16=0x400000,0xc00000,0x0000#defaultfor400000-7f0000\n" + "prot_ro_value16=0x400000,0xc00000,0x0000\n" "[]\n" - "check_str=0x104,\" are Registered Trademarks\"\n" - "check_crc32=0xc004219d\n" - "hw=prot_lk3\n" - "[]\n" - "check_str=0x104,\" are Registered Trademarks\"\n" - "check_crc32=0xaff46765\n" - "hw=prot\n" - "prot_rw_value16=0x400000,0xc00004,0\n" - "prot_rw_value16=0x400004,0xc00004,0\n" - "[]\n" - "check_str=0x118,\"CREATON. \"\n" - "check_crc32=0xddd02ba4\n" + "check_str=0x104,\" MEGA DRIVE (C)\"\n" + "check_str=0x118,\"CREATON.\"\n" + "check_str=0x180,\"MDGM-000\"\n" "hw=prot\n" "prot_ro_value16=0x400000,-2,0x9000\n" "prot_ro_value16=0x401000,-2,0xd300\n" "[]\n" - "check_str=0x104,\" \"\n" - "check_crc32=0xf68f6367\n" + "check_csum=0x6cca\n" + "check_crc32=0xab5d5d9e\n" "hw=prot\n" - "prot_ro_value16=0xa13002,-2,0x01\n" - "prot_ro_value16=0xa1303e,-2,0x1f\n" + "prot_ro_value16=0x500008,-2,0x5000\n" "[]\n" - "check_str=0x104,\" \"\n" - "check_crc32=0xfb176667\n" + "check_str=0x113,\"KANKO 91-92\"\n" + "check_crc32=0x79423515\n" "hw=prot\n" - "prot_ro_value16=0xa13000,-2,0x14\n" - "prot_ro_value16=0xa13002,-2,0x01\n" - "prot_ro_value16=0xa1303e,-2,0x1f\n" + "prot_ro_value16=0x500008,-2,0x5000\n" + "prot_ro_value16=0x500208,-2,0xa000\n" + "[]\n" + "check_str=0x104,\" are Registered Trademarks\"\n" + "check_crc32=0x7009cac3\n" + "hw=prot\n" + "prot_rw_value16=0x400000,0xc00004,0\n" + "prot_rw_value16=0x400004,0xc00004,0\n" + "[]\n" + "check_str=0x104,\" are Registered Trademarks\"\n" + "check_crc32=0x1c602dd4\n" + "hw=prot\n" + "prot_rw_value16=0x400000,0xc00000,0\n" + "prot_rw_value16=0x400004,0xc00004,0\n" + "[]\n" + "check_str=0x104,\" are Registered Trademarks\"\n" + "check_crc32=0xc31cfcca\n" + "hw=prot\n" + "prot_rw_value16=0x400000,0xc00000,0\n" + "prot_rw_value16=0x400004,0xc00004,0\n" + "[]\n" + "check_csum=0x30b9\n" + "check_crc32=0x35e0ff17\n" + "hw=prot\n" + "prot_rw_value16=0x400000,0xc00000,0\n" + "prot_rw_value16=0x400004,0xc00004,0\n" + "[]\n" + "check_csum=0x5ff9\n" + "check_crc32=0x4b2b163a\n" + "hw=prot\n" + "prot_rw_value16=0x400000,0xc00000,0\n" + "prot_rw_value16=0x400004,0xc00004,0\n" + "[]\n" + "check_csum=0x6001\n" + "check_crc32=0xfa80956a\n" + "hw=prot\n" + "prot_rw_value16=0x400000,0xc00000,0\n" + "prot_rw_value16=0x400004,0xc00004,0\n" + "[]\n" + "check_csum=0xffff\n" + "check_crc32=0x91865ea4\n" + "hw=prot\n" + "prot_rw_value16=0x400000,0xc00000,0\n" + "prot_rw_value16=0x400004,0xc00004,0\n" + "[]\n" + "check_csum=0xffff\n" + "check_crc32=0x8e83dbfa\n" + "hw=prot\n" + "prot_ro_value16=0x400000,-2,0x6300\n" + "prot_ro_value16=0x400002,-2,0x9800\n" + "prot_ro_value16=0x400004,-2,0xc900\n" + "prot_ro_value16=0x400006,-2,0x1800\n" + "[]\n" + "check_str=0x104,\" are Registered Trademarks\"\n" + "check_crc32=0xf838aa3b\n" + "hw=prot\n" + "prot_ro_value16=0x400000,-2,0x6300\n" + "prot_ro_value16=0x400002,-2,0x9800\n" + "prot_ro_value16=0x400004,-2,0xc900\n" + "prot_ro_value16=0x400006,-2,0x1800\n" + "[]\n" + "check_str=0x172,\"GAME : ELF WOR\"\n" + "hw=prot\n" + "prot_ro_value16=0x400000,-2,0x5500\n" + "prot_ro_value16=0x400002,-2,0x0f00\n" + "prot_ro_value16=0x400004,-2,0xc900\n" + "prot_ro_value16=0x400006,-2,0x1800\n" + "[]\n" + "check_str=0x104,\" \"\n" + "check_crc32=0x5fdeb37b\n" + "hw=prot\n" + "prot_ro_value16=0x400000,-2,0x5500\n" + "prot_ro_value16=0x400002,-2,0x0f00\n" + "prot_ro_value16=0x400004,-2,0xc900\n" + "prot_ro_value16=0x400006,-2,0x1800\n" "[]\n" "check_csum=0\n" - "check_crc32=0x3ee639f0\n" - "hw=prot\n" - "prot_ro_value16=0xa13000,-2,0x0c\n" - "[]\n" - "check_csum=0\n" - "check_crc32=0xdecdf740\n" + "check_crc32=0xc9539fce\n" "hw=prot\n" "prot_ro_value16=0x400000,-2,0x5500\n" "prot_ro_value16=0x400002,-2,0x0f00\n" @@ -197,33 +365,63 @@ static const char builtin_carthw_cfg[] = "prot_ro_value16=0x400006,-2,0xf000\n" "[]\n" "check_str=0x104,\" \"\n" - "check_crc32=0xf26f88d1\n" + "check_crc32=0x6a95f766\n" "hw=prot\n" + "prot_ro_value16=0x400000,-2,0x6300\n" "prot_ro_value16=0x400002,-2,0x9800\n" - "prot_ro_value16=0x400004,-2,0xaa00#or0xc900\n" + "prot_ro_value16=0x400004,-2,0xaa00\n" "prot_ro_value16=0x400006,-2,0xf000\n" "[]\n" "check_str=0x104,\" are Registered Trademarks\"\n" - "check_crc32=0x4820a161\n" + "check_crc32=0xf93f3d0b\n" "hw=prot\n" "prot_ro_value16=0x400000,-2,0x5500\n" "prot_ro_value16=0x400002,-2,0x0f00\n" "[]\n" "check_str=0x104,\" are Registered Trademarks\"\n" - "check_crc32=0x413dfee2\n" - "hw=prot_lk3\n" - "[]\n" - "check_str=0x140,\"SUPER MARIO BROS \"\n" + "check_crc32=0x66165305\n" "hw=prot\n" - "prot_ro_value16=0xa13000,-2,0x0c\n" + "prot_ro_value16=0x400000,-2,0x5500\n" + "prot_ro_value16=0x400002,-2,0x0f00\n" + "prot_ro_value16=0x400004,-2,0xaa00\n" + "prot_ro_value16=0x400006,-2,0xf000\n" "[]\n" - "check_str=0x104,\" are Registered Trademarks\"\n" - "check_crc32=0xf7e1b3e1\n" + "check_str=0xfe,\"WISEGAME\"\n" + "check_crc32=0x6f01bd65\n" "hw=prot\n" - "prot_ro_value16=0xa13000,-2,0x0a\n" + "prot_ro_value16=0x400000,-2,0x5500\n" + "prot_ro_value16=0x400002,-2,0x0f00\n" + "prot_ro_value16=0x400004,-2,0xaa00\n" + "prot_ro_value16=0x400006,-2,0xf000\n" "[]\n" - "check_str=0x104,\" are Registered Trademarks\"\n" - "check_crc32=0xb8261ff5\n" + "check_str=0xfe,\"WISEGAME IS TRADE MARKER\"\n" "hw=prot\n" - "prot_rw_value16=0x400000,0xc00000,0\n" + "prot_ro_value16=0x400002,-2,0xaa00\n" + "prot_ro_value16=0x400004,-2,0xc900\n" + "prot_ro_value16=0x400006,-2,0xf000\n" + "[]\n" + "check_csum=0xffff\n" + "check_crc32=0xdd02797c\n" + "hw=prot\n" + "prot_ro_value16=0x400000,-2,0x5500\n" + "prot_ro_value16=0x400002,-2,0x0f00\n" + "prot_ro_value16=0x400004,-2,0xaa00\n" + "prot_ro_value16=0x400006,-2,0xf000\n" + "[]\n" + "check_csum=0\n" + "check_crc32=0xee9fc429\n" + "hw=prot\n" + "prot_ro_value16=0x400000,-2,0x6300\n" + "[]\n" + "check_str=0x180,\"GM 00000000-00\"\n" + "check_crc32=0x6732aab4\n" + "prop=no_z80_bus_lock\n" + "[]\n" + "check_str=0x180,\"GM 00000000-00\"\n" + "check_crc32=0xe302585a\n" + "prop=no_z80_bus_lock\n" + "[]\n" + "check_csum=8224\n" + "check_crc32=0x20ed0de8\n" + "prop=no_z80_bus_lock\n" ; diff --git a/pico/cd/LC89510.c b/pico/cd/LC89510.c deleted file mode 100644 index 74894760..00000000 --- a/pico/cd/LC89510.c +++ /dev/null @@ -1,637 +0,0 @@ -/*********************************************************** - * * - * This source file was taken from the Gens project * - * Written by Stéphane Dallongeville * - * Copyright (c) 2002 by Stéphane Dallongeville * - * Modified/adapted for PicoDrive by notaz, 2007 * - * * - ***********************************************************/ - -#include "../pico_int.h" - -#define CDC_DMA_SPEED 256 - - -static void CDD_Reset(void) -{ - // Reseting CDD - - memset(Pico_mcd->s68k_regs+0x34, 0, 2*2); // CDD.Fader, CDD.Control - Pico_mcd->cdd.Status = 0; - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - // clear receive status and transfer command - memset(Pico_mcd->s68k_regs+0x38, 0, 20); - Pico_mcd->s68k_regs[0x38+9] = 0xF; // Default checksum -} - - -static void CDC_Reset(void) -{ - // Reseting CDC - - memset(Pico_mcd->cdc.Buffer, 0, sizeof(Pico_mcd->cdc.Buffer)); - - Pico_mcd->cdc.COMIN = 0; - Pico_mcd->cdc.IFSTAT = 0xFF; - Pico_mcd->cdc.DAC.N = 0; - Pico_mcd->cdc.DBC.N = 0; - Pico_mcd->cdc.HEAD.N = 0x01000000; - Pico_mcd->cdc.PT.N = 0; - Pico_mcd->cdc.WA.N = 2352 * 2; - Pico_mcd->cdc.STAT.N = 0x00000080; - Pico_mcd->cdc.SBOUT = 0; - Pico_mcd->cdc.IFCTRL = 0; - Pico_mcd->cdc.CTRL.N = 0; - - Pico_mcd->cdc.Decode_Reg_Read = 0; - Pico_mcd->scd.Status_CDC &= ~0x08; -} - - -PICO_INTERNAL void LC89510_Reset(void) -{ - CDD_Reset(); - CDC_Reset(); - - // clear DMA_Adr & Stop_Watch - memset(Pico_mcd->s68k_regs + 0xA, 0, 4); -} - - -PICO_INTERNAL void Update_CDC_TRansfer(int which) -{ - unsigned int DMA_Adr, dep, length; - unsigned short *dest; - unsigned char *src; - - if (1) //Pico_mcd->cdc.DBC.N <= (CDC_DMA_SPEED * 2)) - { - length = (Pico_mcd->cdc.DBC.N + 1) >> 1; - Pico_mcd->scd.Status_CDC &= ~0x08; // Last transfer - Pico_mcd->s68k_regs[4] |= 0x80; // End data transfer - Pico_mcd->s68k_regs[4] &= ~0x40; // no more data ready - Pico_mcd->cdc.IFSTAT |= 0x08; // No more data transfer in progress - - if (Pico_mcd->cdc.IFCTRL & 0x40) // DTEIEN = Data Trasnfer End Interrupt Enable ? - { - Pico_mcd->cdc.IFSTAT &= ~0x40; - - if (Pico_mcd->s68k_regs[0x33] & PCDS_IEN5) - { - elprintf(EL_INTS, "cdc DTE irq 5"); - SekInterruptS68k(5); - } - } - } - else length = CDC_DMA_SPEED; - - - // TODO: dst bounds checking? - src = Pico_mcd->cdc.Buffer + Pico_mcd->cdc.DAC.N; - DMA_Adr = (Pico_mcd->s68k_regs[0xA]<<8) | Pico_mcd->s68k_regs[0xB]; - - if (which == 7) // WORD RAM - { - if (Pico_mcd->s68k_regs[3] & 4) - { - // test: Final Fight - int bank = !(Pico_mcd->s68k_regs[3]&1); - dep = ((DMA_Adr & 0x3FFF) << 3); - cdprintf("CD DMA # %04x -> word_ram1M # %06x, len=%i", - Pico_mcd->cdc.DAC.N, dep, length); - - dest = (unsigned short *) (Pico_mcd->word_ram1M[bank] + dep); - - memcpy16bswap(dest, src, length); - - /*{ // debug - unsigned char *b1 = Pico_mcd->word_ram1M[bank] + dep; - unsigned char *b2 = (unsigned char *)(dest+length) - 8; - dprintf("%02x %02x %02x %02x .. %02x %02x %02x %02x", - b1[0], b1[1], b1[4], b1[5], b2[0], b2[1], b2[4], b2[5]); - }*/ - } - else - { - dep = ((DMA_Adr & 0x7FFF) << 3); - cdprintf("CD DMA # %04x -> word_ram2M # %06x, len=%i", - Pico_mcd->cdc.DAC.N, dep, length); - dest = (unsigned short *) (Pico_mcd->word_ram2M + dep); - - memcpy16bswap(dest, src, length); - - /*{ // debug - unsigned char *b1 = Pico_mcd->word_ram2M + dep; - unsigned char *b2 = (unsigned char *)(dest+length) - 4; - dprintf("%02x %02x %02x %02x .. %02x %02x %02x %02x", - b1[0], b1[1], b1[2], b1[3], b2[0], b2[1], b2[2], b2[3]); - }*/ - } - } - else if (which == 4) // PCM RAM (check: popful Mail) - { - dep = (DMA_Adr & 0x03FF) << 2; - cdprintf("CD DMA # %04x -> PCM[%i] # %04x, len=%i", - Pico_mcd->cdc.DAC.N, Pico_mcd->pcm.bank, dep, length); - dest = (unsigned short *) (Pico_mcd->pcm_ram_b[Pico_mcd->pcm.bank] + dep); - - if (Pico_mcd->cdc.DAC.N & 1) /* unaligned src? */ - memcpy(dest, src, length*2); - else memcpy16(dest, (unsigned short *) src, length); - } - else if (which == 5) // PRG RAM - { - dep = DMA_Adr << 3; - dest = (unsigned short *) (Pico_mcd->prg_ram + dep); - cdprintf("CD DMA # %04x -> prg_ram # %06x, len=%i", - Pico_mcd->cdc.DAC.N, dep, length); - - memcpy16bswap(dest, src, length); - - /*{ // debug - unsigned char *b1 = Pico_mcd->prg_ram + dep; - unsigned char *b2 = (unsigned char *)(dest+length) - 4; - dprintf("%02x %02x %02x %02x .. %02x %02x %02x %02x", - b1[0], b1[1], b1[2], b1[3], b2[0], b2[1], b2[2], b2[3]); - }*/ - } - - length <<= 1; - Pico_mcd->cdc.DAC.N = (Pico_mcd->cdc.DAC.N + length) & 0xFFFF; - if (Pico_mcd->scd.Status_CDC & 0x08) Pico_mcd->cdc.DBC.N -= length; - else Pico_mcd->cdc.DBC.N = 0; - - // update DMA_Adr - length >>= 2; - if (which != 4) length >>= 1; - DMA_Adr += length; - Pico_mcd->s68k_regs[0xA] = DMA_Adr >> 8; - Pico_mcd->s68k_regs[0xB] = DMA_Adr; -} - - -PICO_INTERNAL_ASM unsigned short Read_CDC_Host(int is_sub) -{ - int addr; - - if (!(Pico_mcd->scd.Status_CDC & 0x08)) - { - // Transfer data disabled - cdprintf("Read_CDC_Host FIXME: Transfer data disabled"); - return 0; - } - - if ((is_sub && (Pico_mcd->s68k_regs[4] & 7) != 3) || - (!is_sub && (Pico_mcd->s68k_regs[4] & 7) != 2)) - { - // Wrong setting - cdprintf("Read_CDC_Host FIXME: Wrong setting"); - return 0; - } - - Pico_mcd->cdc.DBC.N -= 2; - - if (Pico_mcd->cdc.DBC.N <= 0) - { - Pico_mcd->cdc.DBC.N = 0; - Pico_mcd->scd.Status_CDC &= ~0x08; // Last transfer - Pico_mcd->s68k_regs[4] |= 0x80; // End data transfer - Pico_mcd->s68k_regs[4] &= ~0x40; // no more data ready - Pico_mcd->cdc.IFSTAT |= 0x08; // No more data transfer in progress - - if (Pico_mcd->cdc.IFCTRL & 0x40) // DTEIEN = Data Transfer End Interrupt Enable ? - { - Pico_mcd->cdc.IFSTAT &= ~0x40; - - if (Pico_mcd->s68k_regs[0x33]&(1<<5)) { - elprintf(EL_INTS, "m68k: s68k irq 5"); - SekInterruptS68k(5); - } - - cdprintf("CDC - DTE interrupt"); - } - } - - addr = Pico_mcd->cdc.DAC.N; - Pico_mcd->cdc.DAC.N += 2; - - cdprintf("Read_CDC_Host sub=%i d=%04x dac=%04x dbc=%04x", is_sub, - (Pico_mcd->cdc.Buffer[addr]<<8) | Pico_mcd->cdc.Buffer[addr+1], Pico_mcd->cdc.DAC.N, Pico_mcd->cdc.DBC.N); - - return (Pico_mcd->cdc.Buffer[addr]<<8) | Pico_mcd->cdc.Buffer[addr+1]; -} - - -PICO_INTERNAL void CDC_Update_Header(void) -{ - if (Pico_mcd->cdc.CTRL.B.B1 & 0x01) // Sub-Header wanted ? - { - Pico_mcd->cdc.HEAD.B.B0 = 0; - Pico_mcd->cdc.HEAD.B.B1 = 0; - Pico_mcd->cdc.HEAD.B.B2 = 0; - Pico_mcd->cdc.HEAD.B.B3 = 0; - } - else - { - _msf MSF; - - LBA_to_MSF(Pico_mcd->scd.Cur_LBA, &MSF); - - Pico_mcd->cdc.HEAD.B.B0 = INT_TO_BCDB(MSF.M); - Pico_mcd->cdc.HEAD.B.B1 = INT_TO_BCDB(MSF.S); - Pico_mcd->cdc.HEAD.B.B2 = INT_TO_BCDB(MSF.F); - Pico_mcd->cdc.HEAD.B.B3 = 0x01; - } -} - - -PICO_INTERNAL unsigned char CDC_Read_Reg(void) -{ - unsigned char ret; - - switch(Pico_mcd->s68k_regs[5] & 0xF) - { - case 0x0: // COMIN - cdprintf("CDC read reg 00 = %.2X", Pico_mcd->cdc.COMIN); - - Pico_mcd->s68k_regs[5] = 0x1; - return Pico_mcd->cdc.COMIN; - - case 0x1: // IFSTAT - cdprintf("CDC read reg 01 = %.2X", Pico_mcd->cdc.IFSTAT); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 1); // Reg 1 (decoding) - Pico_mcd->s68k_regs[5] = 0x2; - return Pico_mcd->cdc.IFSTAT; - - case 0x2: // DBCL - cdprintf("CDC read reg 02 = %.2X", Pico_mcd->cdc.DBC.B.L); - - Pico_mcd->s68k_regs[5] = 0x3; - return Pico_mcd->cdc.DBC.B.L; - - case 0x3: // DBCH - cdprintf("CDC read reg 03 = %.2X", Pico_mcd->cdc.DBC.B.H); - - Pico_mcd->s68k_regs[5] = 0x4; - return Pico_mcd->cdc.DBC.B.H; - - case 0x4: // HEAD0 - cdprintf("CDC read reg 04 = %.2X", Pico_mcd->cdc.HEAD.B.B0); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 4); // Reg 4 (decoding) - Pico_mcd->s68k_regs[5] = 0x5; - return Pico_mcd->cdc.HEAD.B.B0; - - case 0x5: // HEAD1 - cdprintf("CDC read reg 05 = %.2X", Pico_mcd->cdc.HEAD.B.B1); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 5); // Reg 5 (decoding) - Pico_mcd->s68k_regs[5] = 0x6; - return Pico_mcd->cdc.HEAD.B.B1; - - case 0x6: // HEAD2 - cdprintf("CDC read reg 06 = %.2X", Pico_mcd->cdc.HEAD.B.B2); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 6); // Reg 6 (decoding) - Pico_mcd->s68k_regs[5] = 0x7; - return Pico_mcd->cdc.HEAD.B.B2; - - case 0x7: // HEAD3 - cdprintf("CDC read reg 07 = %.2X", Pico_mcd->cdc.HEAD.B.B3); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 7); // Reg 7 (decoding) - Pico_mcd->s68k_regs[5] = 0x8; - return Pico_mcd->cdc.HEAD.B.B3; - - case 0x8: // PTL - cdprintf("CDC read reg 08 = %.2X", Pico_mcd->cdc.PT.B.L); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 8); // Reg 8 (decoding) - Pico_mcd->s68k_regs[5] = 0x9; - return Pico_mcd->cdc.PT.B.L; - - case 0x9: // PTH - cdprintf("CDC read reg 09 = %.2X", Pico_mcd->cdc.PT.B.H); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 9); // Reg 9 (decoding) - Pico_mcd->s68k_regs[5] = 0xA; - return Pico_mcd->cdc.PT.B.H; - - case 0xA: // WAL - cdprintf("CDC read reg 10 = %.2X", Pico_mcd->cdc.WA.B.L); - - Pico_mcd->s68k_regs[5] = 0xB; - return Pico_mcd->cdc.WA.B.L; - - case 0xB: // WAH - cdprintf("CDC read reg 11 = %.2X", Pico_mcd->cdc.WA.B.H); - - Pico_mcd->s68k_regs[5] = 0xC; - return Pico_mcd->cdc.WA.B.H; - - case 0xC: // STAT0 - cdprintf("CDC read reg 12 = %.2X", Pico_mcd->cdc.STAT.B.B0); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 12); // Reg 12 (decoding) - Pico_mcd->s68k_regs[5] = 0xD; - return Pico_mcd->cdc.STAT.B.B0; - - case 0xD: // STAT1 - cdprintf("CDC read reg 13 = %.2X", Pico_mcd->cdc.STAT.B.B1); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 13); // Reg 13 (decoding) - Pico_mcd->s68k_regs[5] = 0xE; - return Pico_mcd->cdc.STAT.B.B1; - - case 0xE: // STAT2 - cdprintf("CDC read reg 14 = %.2X", Pico_mcd->cdc.STAT.B.B2); - - Pico_mcd->cdc.Decode_Reg_Read |= (1 << 14); // Reg 14 (decoding) - Pico_mcd->s68k_regs[5] = 0xF; - return Pico_mcd->cdc.STAT.B.B2; - - case 0xF: // STAT3 - cdprintf("CDC read reg 15 = %.2X", Pico_mcd->cdc.STAT.B.B3); - - ret = Pico_mcd->cdc.STAT.B.B3; - Pico_mcd->cdc.IFSTAT |= 0x20; // decoding interrupt flag cleared - if ((Pico_mcd->cdc.CTRL.B.B0 & 0x80) && (Pico_mcd->cdc.IFCTRL & 0x20)) - { - if ((Pico_mcd->cdc.Decode_Reg_Read & 0x73F2) == 0x73F2) - Pico_mcd->cdc.STAT.B.B3 = 0x80; - } - return ret; - } - - return 0; -} - - -PICO_INTERNAL void CDC_Write_Reg(unsigned char Data) -{ - cdprintf("CDC write reg%02d = %.2X", Pico_mcd->s68k_regs[5] & 0xF, Data); - - switch (Pico_mcd->s68k_regs[5] & 0xF) - { - case 0x0: // SBOUT - Pico_mcd->s68k_regs[5] = 0x1; - Pico_mcd->cdc.SBOUT = Data; - - break; - - case 0x1: // IFCTRL - Pico_mcd->s68k_regs[5] = 0x2; - Pico_mcd->cdc.IFCTRL = Data; - - if ((Pico_mcd->cdc.IFCTRL & 0x02) == 0) // Stop data transfer - { - Pico_mcd->cdc.DBC.N = 0; - Pico_mcd->scd.Status_CDC &= ~0x08; - Pico_mcd->cdc.IFSTAT |= 0x08; // No more data transfer in progress - } - break; - - case 0x2: // DBCL - Pico_mcd->s68k_regs[5] = 0x3; - Pico_mcd->cdc.DBC.B.L = Data; - - break; - - case 0x3: // DBCH - Pico_mcd->s68k_regs[5] = 0x4; - Pico_mcd->cdc.DBC.B.H = Data; - - break; - - case 0x4: // DACL - Pico_mcd->s68k_regs[5] = 0x5; - Pico_mcd->cdc.DAC.B.L = Data; - - break; - - case 0x5: // DACH - Pico_mcd->s68k_regs[5] = 0x6; - Pico_mcd->cdc.DAC.B.H = Data; - - break; - - case 0x6: // DTTRG - if (Pico_mcd->cdc.IFCTRL & 0x02) // Data transfer enable ? - { - Pico_mcd->cdc.IFSTAT &= ~0x08; // Data transfer in progress - Pico_mcd->scd.Status_CDC |= 0x08; // Data transfer in progress - Pico_mcd->s68k_regs[4] &= 0x7F; // A data transfer start - - cdprintf("************** Starting Data Transfer ***********"); - cdprintf("RS0 = %.4X DAC = %.4X DBC = %.4X DMA adr = %.4X\n\n", Pico_mcd->s68k_regs[4]<<8, - Pico_mcd->cdc.DAC.N, Pico_mcd->cdc.DBC.N, (Pico_mcd->s68k_regs[0xA]<<8) | Pico_mcd->s68k_regs[0xB]); - - // tmp - { - int ddx = Pico_mcd->s68k_regs[4] & 7; - if (ddx < 2) break; // invalid - if (ddx < 4) { - Pico_mcd->s68k_regs[4] |= 0x40; // Data set ready in host port - break; - } - if (ddx == 6) break; // invalid - - pcd_event_schedule_s68k(PCD_EVENT_DMA, Pico_mcd->cdc.DBC.N / 2); - } - } - break; - - case 0x7: // DTACK - Pico_mcd->cdc.IFSTAT |= 0x40; // end data transfer interrupt flag cleared - break; - - case 0x8: // WAL - Pico_mcd->s68k_regs[5] = 0x9; - Pico_mcd->cdc.WA.B.L = Data; - - break; - - case 0x9: // WAH - Pico_mcd->s68k_regs[5] = 0xA; - Pico_mcd->cdc.WA.B.H = Data; - - break; - - case 0xA: // CTRL0 - Pico_mcd->s68k_regs[5] = 0xB; - Pico_mcd->cdc.CTRL.B.B0 = Data; - - break; - - case 0xB: // CTRL1 - Pico_mcd->s68k_regs[5] = 0xC; - Pico_mcd->cdc.CTRL.B.B1 = Data; - - break; - - case 0xC: // PTL - Pico_mcd->s68k_regs[5] = 0xD; - Pico_mcd->cdc.PT.B.L = Data; - - break; - - case 0xD: // PTH - Pico_mcd->s68k_regs[5] = 0xE; - Pico_mcd->cdc.PT.B.H = Data; - - break; - - case 0xE: // CTRL2 - Pico_mcd->cdc.CTRL.B.B2 = Data; - break; - - case 0xF: // RESET - CDC_Reset(); - break; - } -} - - -static int bswapwrite(int a, unsigned short d) -{ - *(unsigned short *)(Pico_mcd->s68k_regs + a) = (d>>8)|(d<<8); - return d + (d >> 8); -} - -PICO_INTERNAL void CDD_Export_Status(void) -{ - unsigned int csum; - - csum = bswapwrite( 0x38+0, Pico_mcd->cdd.Status); - csum += bswapwrite( 0x38+2, Pico_mcd->cdd.Minute); - csum += bswapwrite( 0x38+4, Pico_mcd->cdd.Seconde); - csum += bswapwrite( 0x38+6, Pico_mcd->cdd.Frame); - Pico_mcd->s68k_regs[0x38+8] = Pico_mcd->cdd.Ext; - csum += Pico_mcd->cdd.Ext; - Pico_mcd->s68k_regs[0x38+9] = ~csum & 0xf; - - Pico_mcd->s68k_regs[0x37] &= 3; // CDD.Control - - if (Pico_mcd->s68k_regs[0x33] & PCDS_IEN4) - { - elprintf(EL_INTS, "cdd export irq 4"); - SekInterruptS68k(4); - } - -// cdprintf("CDD exported status\n"); - cdprintf("out: Status=%.4X, Minute=%.4X, Second=%.4X, Frame=%.4X Checksum=%.4X", - (Pico_mcd->s68k_regs[0x38+0] << 8) | Pico_mcd->s68k_regs[0x38+1], - (Pico_mcd->s68k_regs[0x38+2] << 8) | Pico_mcd->s68k_regs[0x38+3], - (Pico_mcd->s68k_regs[0x38+4] << 8) | Pico_mcd->s68k_regs[0x38+5], - (Pico_mcd->s68k_regs[0x38+6] << 8) | Pico_mcd->s68k_regs[0x38+7], - (Pico_mcd->s68k_regs[0x38+8] << 8) | Pico_mcd->s68k_regs[0x38+9]); -} - - -PICO_INTERNAL void CDD_Import_Command(void) -{ -// cdprintf("CDD importing command\n"); - cdprintf("in: Command=%.4X, Minute=%.4X, Second=%.4X, Frame=%.4X Checksum=%.4X", - (Pico_mcd->s68k_regs[0x38+10+0] << 8) | Pico_mcd->s68k_regs[0x38+10+1], - (Pico_mcd->s68k_regs[0x38+10+2] << 8) | Pico_mcd->s68k_regs[0x38+10+3], - (Pico_mcd->s68k_regs[0x38+10+4] << 8) | Pico_mcd->s68k_regs[0x38+10+5], - (Pico_mcd->s68k_regs[0x38+10+6] << 8) | Pico_mcd->s68k_regs[0x38+10+7], - (Pico_mcd->s68k_regs[0x38+10+8] << 8) | Pico_mcd->s68k_regs[0x38+10+9]); - - switch (Pico_mcd->s68k_regs[0x38+10+0]) - { - case 0x0: // STATUS (?) - Get_Status_CDD_c0(); - break; - - case 0x1: // STOP ALL (?) - Stop_CDD_c1(); - break; - - case 0x2: // GET TOC INFORMATIONS - switch(Pico_mcd->s68k_regs[0x38+10+3]) - { - case 0x0: // get current position (MSF format) - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00); - Get_Pos_CDD_c20(); - break; - - case 0x1: // get elapsed time of current track played/scanned (relative MSF format) - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 1; - Get_Track_Pos_CDD_c21(); - break; - - case 0x2: // get current track in RS2-RS3 - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 2; - Get_Current_Track_CDD_c22(); - break; - - case 0x3: // get total length (MSF format) - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 3; - Get_Total_Lenght_CDD_c23(); - break; - - case 0x4: // first & last track number - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 4; - Get_First_Last_Track_CDD_c24(); - break; - - case 0x5: // get track addresse (MSF format) - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 5; - Get_Track_Adr_CDD_c25(); - break; - - default : // invalid, then we return status - Pico_mcd->cdd.Status = (Pico_mcd->cdd.Status & 0xFF00) | 0xF; - Get_Status_CDD_c0(); - break; - } - break; - - case 0x3: // READ - Play_CDD_c3(); - break; - - case 0x4: // SEEK - Seek_CDD_c4(); - break; - - case 0x6: // PAUSE/STOP - Pause_CDD_c6(); - break; - - case 0x7: // RESUME - Resume_CDD_c7(); - break; - - case 0x8: // FAST FOWARD - Fast_Foward_CDD_c8(); - break; - - case 0x9: // FAST REWIND - Fast_Rewind_CDD_c9(); - break; - - case 0xA: // RECOVER INITIAL STATE (?) - CDD_cA(); - break; - - case 0xC: // CLOSE TRAY - Close_Tray_CDD_cC(); - break; - - case 0xD: // OPEN TRAY - Open_Tray_CDD_cD(); - break; - - default: // UNKNOWN - CDD_Def(); - break; - } -} - diff --git a/pico/cd/LC89510.h b/pico/cd/LC89510.h deleted file mode 100644 index 2b0d3826..00000000 --- a/pico/cd/LC89510.h +++ /dev/null @@ -1,135 +0,0 @@ -/*********************************************************** - * * - * This source was taken from the Gens project * - * Written by Stéphane Dallongeville * - * Copyright (c) 2002 by Stéphane Dallongeville * - * Modified/adapted for PicoDrive by notaz, 2007 * - * * - ***********************************************************/ - -#ifndef _LC89510_H -#define _LC89510_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct -{ - unsigned char Buffer[(32 * 1024 * 2) + 2352]; -// unsigned int Host_Data; // unused -// unsigned int DMA_Adr; // 0A -// unsigned int Stop_Watch; // 0C - unsigned int COMIN; - unsigned int IFSTAT; - union - { - struct - { - unsigned char L; - unsigned char H; - unsigned short unused; - } B; - int N; - } DBC; - union - { - struct - { - unsigned char L; - unsigned char H; - unsigned short unused; - } B; - int N; - } DAC; - union - { - struct - { - unsigned char B0; - unsigned char B1; - unsigned char B2; - unsigned char B3; - } B; - unsigned int N; - } HEAD; - union - { - struct - { - unsigned char L; - unsigned char H; - unsigned short unused; - } B; - int N; - } PT; - union - { - struct - { - unsigned char L; - unsigned char H; - unsigned short unused; - } B; - int N; - } WA; - union - { - struct - { - unsigned char B0; - unsigned char B1; - unsigned char B2; - unsigned char B3; - } B; - unsigned int N; - } STAT; - unsigned int SBOUT; - unsigned int IFCTRL; - union - { - struct - { - unsigned char B0; - unsigned char B1; - unsigned char B2; - unsigned char B3; - } B; - unsigned int N; - } CTRL; - unsigned int Decode_Reg_Read; -} CDC; - -typedef struct -{ -// unsigned short Fader; // 34 -// unsigned short Control; // 36 -// unsigned short Cur_Comm;// unused - - // "Receive status" - unsigned short Status; - unsigned short Minute; - unsigned short Seconde; - unsigned short Frame; - unsigned char Ext; - unsigned char pad[3]; -} CDD; - - -PICO_INTERNAL_ASM unsigned short Read_CDC_Host(int is_sub); -PICO_INTERNAL void LC89510_Reset(void); -PICO_INTERNAL void Update_CDC_TRansfer(int which); -PICO_INTERNAL void CDC_Update_Header(void); - -PICO_INTERNAL unsigned char CDC_Read_Reg(void); -PICO_INTERNAL void CDC_Write_Reg(unsigned char Data); - -PICO_INTERNAL void CDD_Export_Status(void); -PICO_INTERNAL void CDD_Import_Command(void); - -#ifdef __cplusplus -}; -#endif - -#endif - diff --git a/pico/cd/buffering.c b/pico/cd/buffering.c deleted file mode 100644 index 84203369..00000000 --- a/pico/cd/buffering.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Buffering handling - * (C) notaz, 2007,2008 - * - * This work is licensed under the terms of MAME license. - * See COPYING file in the top-level directory. - */ - -#include "../pico_int.h" -#include "../cd/cue.h" - -int PicoCDBuffers = 0; -static unsigned char *cd_buffer = NULL; -static int prev_lba = 0x80000000; - -static int hits, reads; - -#undef dprintf -#define dprintf(...) - -void PicoCDBufferInit(void) -{ - void *tmp = NULL; - - prev_lba = 0x80000000; - hits = reads = 0; - - if (PicoCDBuffers <= 1) { - PicoCDBuffers = 0; - return; /* buffering off */ - } - - /* try alloc'ing until we succeed */ - while (PicoCDBuffers > 0) - { - tmp = realloc(cd_buffer, PicoCDBuffers * 2048 + 304); - if (tmp != NULL) break; - PicoCDBuffers >>= 1; - } - - if (PicoCDBuffers <= 0) return; /* buffering became off */ - - cd_buffer = tmp; -} - - -void PicoCDBufferFree(void) -{ - if (cd_buffer) { - free(cd_buffer); - cd_buffer = NULL; - } - if (reads) - elprintf(EL_STATUS, "CD buffer hits: %i/%i (%i%%)\n", hits, reads, hits * 100 / reads); -} - - -void PicoCDBufferFlush(void) -{ - prev_lba = 0x80000000; -} - - -/* this is was a try to fight slow SD access of GP2X */ -PICO_INTERNAL void PicoCDBufferRead(void *dest, int lba) -{ - int is_bin, offs, read_len, moved = 0; - reads++; - - is_bin = Pico_mcd->TOC.Tracks[0].ftype == CT_BIN; - - if (PicoCDBuffers <= 0) - { - /* no buffering */ - int where_seek = is_bin ? (lba * 2352 + 16) : (lba << 11); - pm_seek(Pico_mcd->TOC.Tracks[0].F, where_seek, SEEK_SET); - pm_read(dest, 2048, Pico_mcd->TOC.Tracks[0].F); - return; - } - - /* hit? */ - offs = lba - prev_lba; - if (offs >= 0 && offs < PicoCDBuffers) - { - hits++; - if (offs == 0) dprintf("CD buffer seek to old %i -> %i\n", prev_lba, lba); - memcpy32(dest, (int *)(cd_buffer + offs*2048), 2048/4); - return; - } - - if (prev_lba + PicoCDBuffers != lba) - { - int where_seek = is_bin ? (lba * 2352 + 16) : (lba << 11); - dprintf("CD buffer seek %i -> %i\n", prev_lba, lba); - pm_seek(Pico_mcd->TOC.Tracks[0].F, where_seek, SEEK_SET); - } - - dprintf("CD buffer miss %i -> %i\n", prev_lba, lba); - - if (lba < prev_lba && prev_lba - lba < PicoCDBuffers) - { - read_len = prev_lba - lba; - dprintf("CD buffer move=%i, read_len=%i", PicoCDBuffers - read_len, read_len); - memmove(cd_buffer + read_len*2048, cd_buffer, (PicoCDBuffers - read_len)*2048); - moved = 1; - } - else - { - read_len = PicoCDBuffers; - } - - if (PicoMessage != NULL && read_len >= 512) - { - PicoMessage("Buffering data..."); - } - - if (is_bin) - { - int i = 0; -#ifdef _PSP_FW_VERSION - int bufs = (read_len*2048) / (2048+304); - pm_read(cd_buffer, bufs*(2048+304), Pico_mcd->TOC.Tracks[0].F); - for (i = 1; i < bufs; i++) - // should really use memmove here, but my memcpy32 implementation is also suitable here - memcpy32((int *)(cd_buffer + i*2048), (int *)(cd_buffer + i*(2048+304)), 2048/4); -#endif - for (; i < read_len - 1; i++) - { - pm_read(cd_buffer + i*2048, 2048 + 304, Pico_mcd->TOC.Tracks[0].F); - // pm_seek(Pico_mcd->TOC.Tracks[0].F, 304, SEEK_CUR); // seeking is slower, in PSP case even more - } - // further data might be moved, do not overwrite - pm_read(cd_buffer + i*2048, 2048, Pico_mcd->TOC.Tracks[0].F); - pm_seek(Pico_mcd->TOC.Tracks[0].F, 304, SEEK_CUR); - } - else - { - pm_read(cd_buffer, read_len*2048, Pico_mcd->TOC.Tracks[0].F); - } - memcpy32(dest, (int *) cd_buffer, 2048/4); - prev_lba = lba; - - if (moved) - { - /* file pointer must point to the same data in file, as would-be data after our buffer */ - int where_seek; - lba += PicoCDBuffers; - where_seek = is_bin ? (lba * 2352 + 16) : (lba << 11); - pm_seek(Pico_mcd->TOC.Tracks[0].F, where_seek, SEEK_SET); - } -} - diff --git a/pico/cd/cd_file.c b/pico/cd/cd_file.c deleted file mode 100644 index 0f19b71d..00000000 --- a/pico/cd/cd_file.c +++ /dev/null @@ -1,400 +0,0 @@ -/*********************************************************** - * * - * This source was taken from the Gens project * - * Written by Stéphane Dallongeville * - * Copyright (c) 2002 by Stéphane Dallongeville * - * Modified/adapted for PicoDrive by notaz, 2007 * - * * - ***********************************************************/ - -#include "../pico_int.h" -#include "cd_file.h" -#include "cue.h" - -//#define cdprintf(f,...) printf(f "\n",##__VA_ARGS__) // tmp - -static void to_upper(char *d, const char *s) -{ - for (; *s != 0; d++, s++) { - if ('a' <= *s && *s <= 'z') - *d = *s - 'a' + 'A'; - else - *d = *s; - } -} - -static int audio_track_mp3(const char *fname, int index) -{ - _scd_track *Tracks = Pico_mcd->TOC.Tracks; - FILE *tmp_file; - int fs, ret; - - tmp_file = fopen(fname, "rb"); - if (tmp_file == NULL) - return -1; - - ret = fseek(tmp_file, 0, SEEK_END); - fs = ftell(tmp_file); // used to calculate length - fseek(tmp_file, 0, SEEK_SET); - -#ifdef _PSP_FW_VERSION - // some systems (like PSP) can't have many open files at a time, - // so we work with their names instead. - fclose(tmp_file); - tmp_file = (void *) strdup(fname); -#endif - Tracks[index].KBtps = (short) mp3_get_bitrate(tmp_file, fs); - Tracks[index].KBtps >>= 3; - if (ret != 0 || Tracks[index].KBtps <= 0) - { - elprintf(EL_STATUS, "track %2i: mp3 bitrate %i", index+1, Tracks[index].KBtps); -#ifdef _PSP_FW_VERSION - free(tmp_file); -#else - fclose(tmp_file); -#endif - return -1; - } - - Tracks[index].F = tmp_file; - - // MP3 File - Tracks[index].ftype = CT_MP3; - fs *= 75; - fs /= Tracks[index].KBtps * 1000; - Tracks[index].Length = fs; - Tracks[index].Offset = 0; - - return 0; -} - -PICO_INTERNAL int Load_CD_Image(const char *cd_img_name, cd_img_type type) -{ - int i, j, num_track, Cur_LBA, index, ret; - int iso_name_len, missed, cd_img_sectors; - _scd_track *Tracks = Pico_mcd->TOC.Tracks; - char tmp_name[256], tmp_ext[10], tmp_ext_u[10]; - cue_data_t *cue_data = NULL; - pm_file *pmf; - static const char *exts[] = { - "%02d.mp3", " %02d.mp3", "-%02d.mp3", "_%02d.mp3", " - %02d.mp3", - "%d.mp3", " %d.mp3", "-%d.mp3", "_%d.mp3", " - %d.mp3", - }; - - if (PicoCDLoadProgressCB != NULL) - PicoCDLoadProgressCB(cd_img_name, 1); - - Unload_ISO(); - - /* is this a .cue? */ - cue_data = cue_parse(cd_img_name); - if (cue_data != NULL) { - cd_img_name = cue_data->tracks[1].fname; - Tracks[0].ftype = cue_data->tracks[1].type; - } - else - Tracks[0].ftype = type == CIT_BIN ? CT_BIN : CT_ISO; - - Tracks[0].F = pmf = pm_open(cd_img_name); - if (Tracks[0].F == NULL) - { - Tracks[0].ftype = 0; - Tracks[0].Length = 0; - if (cue_data != NULL) - cue_destroy(cue_data); - return -1; - } - - if (Tracks[0].ftype == CT_ISO) - cd_img_sectors = pmf->size >>= 11; // size in sectors - else cd_img_sectors = pmf->size /= 2352; - Tracks[0].Offset = 0; - - Tracks[0].MSF.M = 0; // minutes - Tracks[0].MSF.S = 2; // seconds - Tracks[0].MSF.F = 0; // frames - - elprintf(EL_STATUS, "Track 1: %02d:%02d:%02d %9i DATA %s", - Tracks[0].MSF.M, Tracks[0].MSF.S, Tracks[0].MSF.F, - Tracks[0].Length, cd_img_name); - - Cur_LBA = Tracks[0].Length = cd_img_sectors; - - if (cue_data != NULL) - { - if (cue_data->tracks[2].fname == NULL) { // NULL means track2 is in same file as track1 - Cur_LBA = Tracks[0].Length = cue_data->tracks[2].sector_offset; - } - i = 100 / cue_data->track_count+1; - for (num_track = 2; num_track <= cue_data->track_count; num_track++) - { - if (PicoCDLoadProgressCB != NULL) - PicoCDLoadProgressCB(cd_img_name, i * num_track); - index = num_track - 1; - Cur_LBA += cue_data->tracks[num_track].pregap; - if (cue_data->tracks[num_track].type == CT_MP3) { - ret = audio_track_mp3(cue_data->tracks[num_track].fname, index); - if (ret != 0) break; - } - else - { - Tracks[index].ftype = cue_data->tracks[num_track].type; - if (cue_data->tracks[num_track].fname != NULL) - { - pm_file *pmfn = pm_open(cue_data->tracks[num_track].fname); - if (pmfn != NULL) - { - // addume raw, ignore header for wav.. - Tracks[index].F = pmfn; - Tracks[index].Length = pmfn->size / 2352; - Tracks[index].Offset = cue_data->tracks[num_track].sector_offset; - } - else - { - elprintf(EL_STATUS, "track %2i (%s): can't determine length", - num_track, cue_data->tracks[num_track].fname); - Tracks[index].Length = 2*75; - Tracks[index].Offset = 0; - } - } - else - { - if (num_track < cue_data->track_count) - Tracks[index].Length = cue_data->tracks[num_track+1].sector_offset - - cue_data->tracks[num_track].sector_offset; - else - Tracks[index].Length = cd_img_sectors - cue_data->tracks[num_track].sector_offset; - Tracks[index].Offset = cue_data->tracks[num_track].sector_offset; - } - } - - if (cue_data->tracks[num_track].sector_xlength != 0) - // overriden by custom cue command - Tracks[index].Length = cue_data->tracks[num_track].sector_xlength; - - LBA_to_MSF(Cur_LBA, &Tracks[index].MSF); - Cur_LBA += Tracks[index].Length; - - elprintf(EL_STATUS, "Track %2i: %02d:%02d:%02d %9i AUDIO %s", num_track, Tracks[index].MSF.M, - Tracks[index].MSF.S, Tracks[index].MSF.F, Tracks[index].Length, - cue_data->tracks[num_track].fname); - } - cue_destroy(cue_data); - goto finish; - } - - /* mp3 track autosearch, Gens-like */ - iso_name_len = strlen(cd_img_name); - if (iso_name_len >= sizeof(tmp_name)) - iso_name_len = sizeof(tmp_name) - 1; - - for (num_track = 2, i = 0, missed = 0; i < 100 && missed < 4; i++) - { - if (PicoCDLoadProgressCB != NULL && i > 1) - PicoCDLoadProgressCB(cd_img_name, i + (100-i)*missed/4); - - for (j = 0; j < sizeof(exts)/sizeof(char *); j++) - { - int ext_len; - char *p; - - index = num_track - 1; - - sprintf(tmp_ext, exts[j], i); - ext_len = strlen(tmp_ext); - to_upper(tmp_ext_u, tmp_ext); - - memcpy(tmp_name, cd_img_name, iso_name_len + 1); - p = tmp_name + iso_name_len - 4; - - strcpy(p, tmp_ext); - ret = audio_track_mp3(tmp_name, index); - if (ret != 0) { - strcpy(p, tmp_ext_u); - ret = audio_track_mp3(tmp_name, index); - } - - if (ret != 0 && i > 1 && iso_name_len > ext_len) { - p = tmp_name + iso_name_len - ext_len; - strcpy(p, tmp_ext); - ret = audio_track_mp3(tmp_name, index); - if (ret != 0) { - strcpy(p, tmp_ext_u); - ret = audio_track_mp3(tmp_name, index); - } - } - - if (ret == 0) - { - LBA_to_MSF(Cur_LBA, &Tracks[index].MSF); - Cur_LBA += Tracks[index].Length; - - elprintf(EL_STATUS, "Track %2i: %02d:%02d:%02d %9i AUDIO - %s", num_track, Tracks[index].MSF.M, - Tracks[index].MSF.S, Tracks[index].MSF.F, Tracks[index].Length, tmp_name); - - num_track++; - missed = 0; - break; - } - } - if (ret != 0 && i > 1) missed++; - } - -finish: - Pico_mcd->TOC.Last_Track = num_track - 1; - - index = num_track - 1; - - LBA_to_MSF(Cur_LBA, &Tracks[index].MSF); - - elprintf(EL_STATUS, "End CD - %02d:%02d:%02d\n", Tracks[index].MSF.M, - Tracks[index].MSF.S, Tracks[index].MSF.F); - - if (PicoCDLoadProgressCB != NULL) - PicoCDLoadProgressCB(cd_img_name, 100); - - return 0; -} - - -PICO_INTERNAL void Unload_ISO(void) -{ - int i; - - if (Pico_mcd == NULL) return; - - if (Pico_mcd->TOC.Tracks[0].F) pm_close(Pico_mcd->TOC.Tracks[0].F); - - for(i = 1; i < 100; i++) - { - if (Pico_mcd->TOC.Tracks[i].F != NULL) - { - if (Pico_mcd->TOC.Tracks[i].ftype == CT_MP3) -#ifdef _PSP_FW_VERSION - free(Pico_mcd->TOC.Tracks[i].F); -#else - fclose(Pico_mcd->TOC.Tracks[i].F); -#endif - else - pm_close(Pico_mcd->TOC.Tracks[i].F); - } - } - memset(Pico_mcd->TOC.Tracks, 0, sizeof(Pico_mcd->TOC.Tracks)); -} - - -PICO_INTERNAL int FILE_Read_One_LBA_CDC(void) -{ - if (Pico_mcd->s68k_regs[0x36] & 1) // DATA - { - if (Pico_mcd->TOC.Tracks[0].F == NULL) return -1; - - // moved below.. - //fseek(Pico_mcd->TOC.Tracks[0].F, where_read, SEEK_SET); - //fread(cp_buf, 1, 2048, Pico_mcd->TOC.Tracks[0].F); - - cdprintf("Read file CDC 1 data sector :\n"); - } - else // AUDIO - { - cdprintf("Read file CDC 1 audio sector :\n"); - } - - // Update CDC stuff - - CDC_Update_Header(); - - if (Pico_mcd->s68k_regs[0x36] & 1) // DATA track - { - if (Pico_mcd->cdc.CTRL.B.B0 & 0x80) // DECEN = decoding enable - { - if (Pico_mcd->cdc.CTRL.B.B0 & 0x04) // WRRQ : this bit enable write to buffer - { - int where_read = 0; - - // CAUTION : lookahead bit not implemented - - if (Pico_mcd->scd.Cur_LBA < 0) - where_read = 0; - else if (Pico_mcd->scd.Cur_LBA >= Pico_mcd->TOC.Tracks[0].Length) - where_read = Pico_mcd->TOC.Tracks[0].Length - 1; - else where_read = Pico_mcd->scd.Cur_LBA; - - Pico_mcd->scd.Cur_LBA++; - - Pico_mcd->cdc.WA.N = (Pico_mcd->cdc.WA.N + 2352) & 0x7FFF; // add one sector to WA - Pico_mcd->cdc.PT.N = (Pico_mcd->cdc.PT.N + 2352) & 0x7FFF; - - *(unsigned int *)(Pico_mcd->cdc.Buffer + Pico_mcd->cdc.PT.N) = Pico_mcd->cdc.HEAD.N; - //memcpy(&Pico_mcd->cdc.Buffer[Pico_mcd->cdc.PT.N + 4], cp_buf, 2048); - - //pm_seek(Pico_mcd->TOC.Tracks[0].F, where_read, SEEK_SET); - //pm_read(Pico_mcd->cdc.Buffer + Pico_mcd->cdc.PT.N + 4, 2048, Pico_mcd->TOC.Tracks[0].F); - PicoCDBufferRead(Pico_mcd->cdc.Buffer + Pico_mcd->cdc.PT.N + 4, where_read); - - cdprintf("Read -> WA = %d Buffer[%d] =", Pico_mcd->cdc.WA.N, Pico_mcd->cdc.PT.N & 0x3FFF); - cdprintf("Header 1 = %.2X %.2X %.2X %.2X", Pico_mcd->cdc.HEAD.B.B0, - Pico_mcd->cdc.HEAD.B.B1, Pico_mcd->cdc.HEAD.B.B2, Pico_mcd->cdc.HEAD.B.B3); - cdprintf("Header 2 = %.2X %.2X %.2X %.2X --- %.2X %.2X\n\n", - Pico_mcd->cdc.Buffer[(Pico_mcd->cdc.PT.N + 0) & 0x3FFF], - Pico_mcd->cdc.Buffer[(Pico_mcd->cdc.PT.N + 1) & 0x3FFF], - Pico_mcd->cdc.Buffer[(Pico_mcd->cdc.PT.N + 2) & 0x3FFF], - Pico_mcd->cdc.Buffer[(Pico_mcd->cdc.PT.N + 3) & 0x3FFF], - Pico_mcd->cdc.Buffer[(Pico_mcd->cdc.PT.N + 4) & 0x3FFF], - Pico_mcd->cdc.Buffer[(Pico_mcd->cdc.PT.N + 5) & 0x3FFF]); - } - - } - } - else // music track - { - Pico_mcd->scd.Cur_LBA++; - - Pico_mcd->cdc.WA.N = (Pico_mcd->cdc.WA.N + 2352) & 0x7FFF; // add one sector to WA - Pico_mcd->cdc.PT.N = (Pico_mcd->cdc.PT.N + 2352) & 0x7FFF; - - if (Pico_mcd->cdc.CTRL.B.B0 & 0x80) // DECEN = decoding enable - { - if (Pico_mcd->cdc.CTRL.B.B0 & 0x04) // WRRQ : this bit enable write to buffer - { - // CAUTION : lookahead bit not implemented - - // this is pretty rough, but oh well - not much depends on this anyway - memcpy(&Pico_mcd->cdc.Buffer[Pico_mcd->cdc.PT.N], cdda_out_buffer, 2352); - } - } - } - - if (Pico_mcd->cdc.CTRL.B.B0 & 0x80) // DECEN = decoding enable - { - Pico_mcd->cdc.STAT.B.B0 = 0x80; - - if (Pico_mcd->cdc.CTRL.B.B0 & 0x10) // determine form bit form sub header ? - { - Pico_mcd->cdc.STAT.B.B2 = Pico_mcd->cdc.CTRL.B.B1 & 0x08; - } - else - { - Pico_mcd->cdc.STAT.B.B2 = Pico_mcd->cdc.CTRL.B.B1 & 0x0C; - } - - if (Pico_mcd->cdc.CTRL.B.B0 & 0x02) Pico_mcd->cdc.STAT.B.B3 = 0x20; // ECC done - else Pico_mcd->cdc.STAT.B.B3 = 0x00; // ECC not done - - if (Pico_mcd->cdc.IFCTRL & 0x20) - { - if (Pico_mcd->s68k_regs[0x33] & (1<<5)) - { - elprintf(EL_INTS, "cdc dec irq 5"); - SekInterruptS68k(5); - } - - Pico_mcd->cdc.IFSTAT &= ~0x20; // DEC interrupt happen - Pico_mcd->cdc.Decode_Reg_Read = 0; // Reset read after DEC int - } - } - - - return 0; -} - diff --git a/pico/cd/cd_file.h b/pico/cd/cd_file.h deleted file mode 100644 index f9bb8eac..00000000 --- a/pico/cd/cd_file.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef _CD_FILE_H -#define _CD_FILE_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum -{ - CIT_NOT_CD = 0, - CIT_ISO, - CIT_BIN, - CIT_CUE -} -cd_img_type; - - -PICO_INTERNAL int Load_CD_Image(const char *iso_name, cd_img_type type); -PICO_INTERNAL void Unload_ISO(void); -PICO_INTERNAL int FILE_Read_One_LBA_CDC(void); - - -#ifdef __cplusplus -}; -#endif - -#endif diff --git a/pico/cd/cd_image.c b/pico/cd/cd_image.c new file mode 100644 index 00000000..9bd29ce0 --- /dev/null +++ b/pico/cd/cd_image.c @@ -0,0 +1,285 @@ +/* + * CD image handler + * (C) notaz, 2007,2013 + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ + +#include "../pico_int.h" +#include "genplus_macros.h" +#include "cdd.h" +#include "cd_parse.h" + +#if defined(__GNUC__) && __GNUC__ >= 7 +#pragma GCC diagnostic ignored "-Wformat-truncation" +#endif + +static int handle_mp3(const char *fname, int index) +{ + track_t *track = &cdd.toc.tracks[index]; + FILE *tmp_file; + int kBps; + int fs, ret; + + tmp_file = fopen(fname, "rb"); + if (tmp_file == NULL) + return -1; + + ret = fseek(tmp_file, 0, SEEK_END); + fs = ftell(tmp_file); + fseek(tmp_file, 0, SEEK_SET); + +#ifdef _PSP_FW_VERSION + // some systems (like PSP) can't have many open files at a time, + // so we work with their names instead. + fclose(tmp_file); + tmp_file = (void *) strdup(fname); +#endif + + kBps = mp3_get_bitrate(tmp_file, fs) / 8; + if (ret != 0 || kBps <= 0) + { + elprintf(EL_STATUS, "track %2i: mp3 bitrate %i", index+1, kBps); +#ifdef _PSP_FW_VERSION + free(tmp_file); +#else + fclose(tmp_file); +#endif + return -1; + } + + track->type = CT_AUDIO; + track->fd = tmp_file; + track->offset = 0; + + fs *= 75; + fs /= kBps * 1000; + return fs; +} + +static void to_upper(char *d, const char *s) +{ + for (; *s != 0; d++, s++) { + if ('a' <= *s && *s <= 'z') + *d = *s - 'a' + 'A'; + else + *d = *s; + } + *d = 0; +} + +// cdd.c uses lba - 150 +static void sprintf_lba(char *buf, size_t size, int lba) +{ + lba += 150; + snprintf(buf, size, "%02d:%02d:%02d", lba / 60 / 75, + (lba / 75) % 60, lba % 75); +} + +int load_cd_image(const char *cd_img_name, int *type) +{ + static const char *exts[] = { + "%02d.mp3", " %02d.mp3", "-%02d.mp3", "_%02d.mp3", " - %02d.mp3", + "%d.mp3", " %d.mp3", "-%d.mp3", "_%d.mp3", " - %d.mp3", + }; + int i, j, n, lba, index, length, ret; + int iso_name_len, missed, cd_img_sectors; + char tmp_name[256], tmp_ext[10], tmp_ext_u[10]; + track_t *tracks = cdd.toc.tracks; + cd_data_t *cue_data = NULL; + pm_file *pmf; + + if (PicoCDLoadProgressCB != NULL) + PicoCDLoadProgressCB(cd_img_name, 1); + + Pico_mcd->cdda_type = CT_UNKNOWN; + + /* is this a .cue? */ + cue_data = cue_parse(cd_img_name); + if (cue_data != NULL) { + cd_img_name = cue_data->tracks[1].fname; + *type = cue_data->tracks[1].type; + } else { + cue_data = chd_parse(cd_img_name); + if (cue_data != NULL) + *type = cue_data->tracks[1].type; + } + + pmf = pm_open(cd_img_name); + if (pmf == NULL) + { + if (cue_data != NULL) + cdparse_destroy(cue_data); + return -1; + } + tracks[0].fd = pmf; + tracks[0].fname = strdup(cd_img_name); + tracks[0].type = *type & CT_AUDIO; + + if (*type == CT_ISO) + cd_img_sectors = pmf->size >> 11; // size in sectors + else cd_img_sectors = pmf->size / 2352; + + // cdd.c operates with lba - 150 + tracks[0].start = 0; + tracks[0].end = cd_img_sectors; + tracks[0].offset = 0; + + sprintf_lba(tmp_ext, sizeof(tmp_ext), 0); + elprintf(EL_STATUS, "Track 1: %s %9i %s %s", + tmp_ext, tracks[0].end, tracks[0].type ? "AUDIO" : "DATA ", cd_img_name); + + lba = cd_img_sectors; + + if (cue_data != NULL) + { + if (cue_data->track_count > 1 && cue_data->tracks[2].fname == NULL) { + // NULL fname means track2 is in same file as track1 + lba = tracks[0].end = cue_data->tracks[2].sector_offset; + } + i = 100 / cue_data->track_count + 1; // progress display + + for (n = 2; n <= cue_data->track_count; n++) + { + if (PicoCDLoadProgressCB != NULL) + PicoCDLoadProgressCB(cd_img_name, i * n); + + index = n - 1; + lba += cue_data->tracks[n].pregap; + if (cue_data->tracks[n].type == CT_MP3) { + ret = handle_mp3(cue_data->tracks[n].fname, index); + if (ret < 0) + break; + length = ret; + } + else if (cue_data->tracks[n].fname != NULL) + { + pm_file *f = pm_open(cue_data->tracks[n].fname); + if (f != NULL) + { + // assume raw, ignore header for wav.. + tracks[index].fd = f; + tracks[index].fname = strdup(cue_data->tracks[n].fname); + tracks[index].offset = cue_data->tracks[n].sector_offset; + length = f->size / 2352; + } + else + { + elprintf(EL_STATUS, "track %2i (%s): can't determine length", + n, cue_data->tracks[n].fname); + tracks[index].offset = 0; + length = 2*75; + } + } + else + { + if (n < cue_data->track_count) + length = cue_data->tracks[n+1].sector_offset - + cue_data->tracks[n].sector_offset; + else + length = cd_img_sectors - cue_data->tracks[n].sector_offset; + tracks[index].offset = cue_data->tracks[n].sector_offset; + } + + if (cue_data->tracks[n].sector_xlength != 0) + // overriden by custom cue command + length = cue_data->tracks[n].sector_xlength; + + Pico_mcd->cdda_type = cue_data->tracks[n].type; + tracks[index].type = cue_data->tracks[n].type & CT_AUDIO; + + tracks[index].start = lba; + lba += length; + tracks[index].end = lba; + + sprintf_lba(tmp_ext, sizeof(tmp_ext), tracks[index].start); + elprintf(EL_STATUS, "Track %2i: %s %9i %s %s", n, tmp_ext, length, + tracks[index].type ? "AUDIO" : "DATA ", + cue_data->tracks[n].fname ? cue_data->tracks[n].fname : ""); + } + goto finish; + } + + /* mp3 track autosearch, Gens-like */ + iso_name_len = strlen(cd_img_name); + if (iso_name_len >= sizeof(tmp_name)) + iso_name_len = sizeof(tmp_name) - 1; + + for (n = 2, i = 0, missed = 0; i < 100 && missed < 4; i++) + { + if (PicoCDLoadProgressCB != NULL && i > 1) + PicoCDLoadProgressCB(cd_img_name, i + (100-i)*missed/4); + + for (j = 0; j < sizeof(exts)/sizeof(char *); j++) + { + int ext_len; + char *p; + + index = n - 1; + + snprintf(tmp_ext, sizeof(tmp_ext), exts[j], i); + ext_len = strlen(tmp_ext); + to_upper(tmp_ext_u, tmp_ext); + + memcpy(tmp_name, cd_img_name, iso_name_len + 1); + p = tmp_name + iso_name_len - 4; + + strcpy(p, tmp_ext); + ret = handle_mp3(tmp_name, index); + if (ret <= 0) { + strcpy(p, tmp_ext_u); + ret = handle_mp3(tmp_name, index); + } + + if (ret <= 0 && i > 1 && iso_name_len > ext_len) { + p = tmp_name + iso_name_len - ext_len; + strcpy(p, tmp_ext); + ret = handle_mp3(tmp_name, index); + if (ret <= 0) { + strcpy(p, tmp_ext_u); + ret = handle_mp3(tmp_name, index); + } + } + + if (ret > 0) + { + length = ret; + tracks[index].start = lba; + lba += length; + tracks[index].end = lba; + + Pico_mcd->cdda_type = CT_MP3; + tracks[index].type = CT_AUDIO; + + sprintf_lba(tmp_ext, sizeof(tmp_ext), tracks[index].start); + elprintf(EL_STATUS, "Track %2i: %s %9i AUDIO - %s", + n, tmp_ext, length, tmp_name); + + n++; + missed = 0; + break; + } + } + if (ret <= 0 && i > 1) + missed++; + } + +finish: + cdd.toc.last = n - 1; + cdd.toc.end = lba; + tracks[n].start = cdd.toc.end; + + sprintf_lba(tmp_ext, sizeof(tmp_ext), cdd.toc.end); + elprintf(EL_STATUS, "End CD - %s\n", tmp_ext); + + if (PicoCDLoadProgressCB != NULL) + PicoCDLoadProgressCB(cd_img_name, 100); + + if (cue_data != NULL) + cdparse_destroy(cue_data); + + return 0; +} + +// vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/cd/cue.c b/pico/cd/cd_parse.c similarity index 67% rename from pico/cd/cue.c rename to pico/cd/cd_parse.c index a038ccf7..eeb3e880 100644 --- a/pico/cd/cue.c +++ b/pico/cd/cd_parse.c @@ -1,6 +1,7 @@ /* * cuefile handling * (C) notaz, 2008 + * (C) irixxxx, 2020-2023 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -8,11 +9,16 @@ #include #include #include -#include "cue.h" #include "../pico_int.h" +#include "cd_parse.h" // #define elprintf(w,f,...) printf(f "\n",##__VA_ARGS__); +#if defined(USE_LIBCHDR) +#include "libchdr/chd.h" +#include "libchdr/cdrom.h" +#endif + #ifdef _MSC_VER #define snprintf _snprintf #endif @@ -65,20 +71,23 @@ static int get_token(const char *buff, char *dest, int len) static int get_ext(const char *fname, char ext[4], char *base, size_t base_size) { - int len, pos = 0; + size_t pos = 0; + char *p; - len = strlen(fname); - if (len >= 3) - pos = len - 3; + ext[0] = 0; + if (!(p = strrchr(fname, '.'))) + return -1; + pos = p - fname; - strcpy(ext, fname + pos); + strncpy(ext, fname + pos + 1, 4/*sizeof(ext)*/-1); + ext[4/*sizeof(ext)*/-1] = '\0'; - if (base != NULL) { - len = pos; - if (len + 1 < base_size) - len = base_size - 1; - memcpy(base, fname, len); - base[len] = 0; + if (base != NULL && base_size > 0) { + if (pos >= base_size) + pos = base_size - 1; + + memcpy(base, fname, pos); + base[pos] = 0; } return pos; } @@ -105,30 +114,122 @@ static int file_openable(const char *fname) #define BEGINS(buff,str) (strncmp(buff,str,sizeof(str)-1) == 0) /* note: tracks[0] is not used */ -cue_data_t *cue_parse(const char *fname) +cd_data_t *chd_parse(const char *fname) +{ + cd_data_t *data = NULL; +#if defined(USE_LIBCHDR) + cd_data_t *tmp; + int count = 0, count_alloc = 2; + int sectors = 0; + char metadata[256]; + chd_file *cf = NULL; + + if (fname == NULL || *fname == '\0') + return NULL; + + if (chd_open(fname, CHD_OPEN_READ, NULL, &cf) != CHDERR_NONE) + goto out; + + data = calloc(1, sizeof(*data) + count_alloc * sizeof(cd_track_t)); + if (data == NULL) + goto out; + + // get track info + while (count < CD_MAX_TRACKS) { + int track = 0, frames = 0, pregap = 0, postgap = 0; + char type[16], subtype[16], pgtype[16], pgsub[16]; + type[0] = subtype[0] = pgtype[0] = pgsub[0] = 0; + + // get metadata for track + if (chd_get_metadata(cf, CDROM_TRACK_METADATA2_TAG, count, + metadata, sizeof(metadata), 0, 0, 0) == CHDERR_NONE) { + if (sscanf(metadata, CDROM_TRACK_METADATA2_FORMAT, + &track, &type[0], &subtype[0], &frames, + &pregap, &pgtype[0], &pgsub[0], &postgap) != 8) + break; + } + else if (chd_get_metadata(cf, CDROM_TRACK_METADATA_TAG, count, + metadata, sizeof(metadata), 0, 0, 0) == CHDERR_NONE) { + if (sscanf(metadata, CDROM_TRACK_METADATA_FORMAT, + &track, &type[0], &subtype[0], &frames) != 4) + break; + } + else break; // all tracks completed + + // metadata sanity check + if (track != count + 1 || frames < 0 || pregap < 0) + break; + + // allocate track structure + count ++; + if (count >= count_alloc) { + count_alloc *= 2; + tmp = realloc(data, sizeof(*data) + count_alloc * sizeof(cd_track_t)); + if (tmp == NULL) { + count--; + break; + } + data = tmp; + } + memset(&data->tracks[count], 0, sizeof(data->tracks[0])); + + if (count == 1) + data->tracks[count].fname = strdup(fname); + if (!strcmp(type, "MODE1_RAW") || !strcmp(type, "MODE2_RAW")) { + data->tracks[count].type = CT_BIN; + } else if (!strcmp(type, "MODE1") || !strcmp(type, "MODE2_FORM1")) { + data->tracks[count].type = CT_ISO; + } else if (!strcmp(type, "AUDIO")) { + data->tracks[count].type = CT_CHD; + } else + break; + + data->tracks[count].pregap = pregap; + if (pgtype[0] != 'V') // VAUDIO includes pregap in file + pregap = 0; + data->tracks[count].sector_offset = sectors + pregap; + data->tracks[count].sector_xlength = frames - pregap; + sectors += (((frames + CD_TRACK_PADDING - 1) / CD_TRACK_PADDING) * CD_TRACK_PADDING); + } + + // check if image id OK, i.e. there are tracks, and length <= 80 min + if (count && sectors < (80*60*75)) { + data->track_count = count; + } else { + free(data); + data = NULL; + } + +out: + if (cf) + chd_close(cf); +#endif + return data; +} + +cd_data_t *cue_parse(const char *fname) { char current_file[256], *current_filep, cue_base[256]; char buff[256], buff2[32], ext[4], *p; int ret, count = 0, count_alloc = 2, pending_pregap = 0; size_t current_filep_size, fname_len; - cue_data_t *data = NULL; + cd_data_t *data = NULL, *tmp; FILE *f = NULL; - void *tmp; if (fname == NULL || (fname_len = strlen(fname)) == 0) return NULL; - ret = get_ext(fname, ext, cue_base, sizeof(cue_base)); + ret = get_ext(fname, ext, cue_base, sizeof(cue_base) - 4); if (strcasecmp(ext, "cue") == 0) { f = fopen(fname, "r"); } - else { + else if (strcasecmp(ext, "chd") != 0) { // not a .cue, try one with the same base name - if (ret + 3 < sizeof(cue_base)) { - strcpy(cue_base + ret, "cue"); + if (0 < ret && ret < sizeof(cue_base)) { + strcpy(cue_base + ret, ".cue"); f = fopen(cue_base, "r"); if (f == NULL) { - strcpy(cue_base + ret, "CUE"); + strcpy(cue_base + ret, ".CUE"); f = fopen(cue_base, "r"); } } @@ -147,18 +248,16 @@ cue_data_t *cue_parse(const char *fname) // the basename of cuefile, no path snprintf(cue_base, sizeof(cue_base), "%s", current_filep); - p = cue_base + strlen(cue_base); - if (p - 3 >= cue_base) - p[-3] = 0; + p = strrchr(cue_base, '.'); + if (p) p[1] = '\0'; - data = calloc(1, sizeof(*data) + count_alloc * sizeof(cue_track)); + data = calloc(1, sizeof(*data) + count_alloc * sizeof(cd_track_t)); if (data == NULL) goto out; while (!feof(f)) { - tmp = fgets(buff, sizeof(buff), f); - if (tmp == NULL) + if (fgets(buff, sizeof(buff), f) == NULL) break; mystrip(buff); @@ -175,7 +274,7 @@ cue_data_t *cue_parse(const char *fname) count++; if (count >= count_alloc) { count_alloc *= 2; - tmp = realloc(data, sizeof(*data) + count_alloc * sizeof(cue_track)); + tmp = realloc(data, sizeof(*data) + count_alloc * sizeof(cd_track_t)); if (tmp == NULL) { count--; break; @@ -242,16 +341,23 @@ file_ok: data->tracks[count].type = CT_MP3; else if (strcasecmp(ext, "wav") == 0) data->tracks[count].type = CT_WAV; + else if (strcasecmp(ext, "bin") == 0) + data->tracks[count].type = CT_RAW; else { elprintf(EL_STATUS, "unhandled audio format: \"%s\"", data->tracks[count].fname); } } - else + else if (data->tracks[count-1].type & CT_AUDIO) { // propagate previous data->tracks[count].type = data->tracks[count-1].type; } + else + { + // assume raw binary data + data->tracks[count].type = CT_RAW; + } } else { elprintf(EL_STATUS, "unhandled track type: \"%s\"", buff2); @@ -337,7 +443,7 @@ out: } -void cue_destroy(cue_data_t *data) +void cdparse_destroy(cd_data_t *data) { int c; @@ -353,7 +459,7 @@ void cue_destroy(cue_data_t *data) #if 0 int main(int argc, char *argv[]) { - cue_data_t *data = cue_parse(argv[1]); + cd_data_t *data = cue_parse(argv[1]); int c; if (data == NULL) return 1; @@ -363,7 +469,7 @@ int main(int argc, char *argv[]) data->tracks[c].sector_offset / (75*60), data->tracks[c].sector_offset / 75 % 60, data->tracks[c].sector_offset % 75, data->tracks[c].pregap, data->tracks[c].fname); - cue_destroy(data); + cdparse_destroy(data); return 0; } diff --git a/pico/cd/cd_parse.h b/pico/cd/cd_parse.h new file mode 100644 index 00000000..0e1f59e7 --- /dev/null +++ b/pico/cd/cd_parse.h @@ -0,0 +1,5 @@ + +cd_data_t *chd_parse(const char *fname); +cd_data_t *cue_parse(const char *fname); +void cdparse_destroy(cd_data_t *data); + diff --git a/pico/cd/cd_sys.c b/pico/cd/cd_sys.c deleted file mode 100644 index 1c19057f..00000000 --- a/pico/cd/cd_sys.c +++ /dev/null @@ -1,757 +0,0 @@ -/*********************************************************** - * * - * This source file was taken from the Gens project * - * Written by Stéphane Dallongeville * - * Copyright (c) 2002 by Stéphane Dallongeville * - * Modified/adapted for PicoDrive by notaz, 2007 * - * * - ***********************************************************/ - -#include - -#include "../pico_int.h" -#include "cd_sys.h" -#include "cd_file.h" - -#define DEBUG_CD - -#define TRAY_OPEN 0x0500 // TRAY OPEN CDD status -#define NOCD 0x0000 // CD removed CDD status -#define STOPPED 0x0900 // STOPPED CDD status (happen after stop or close tray command) -#define READY 0x0400 // READY CDD status (also used for seeking) -#define FAST_FOW 0x0300 // FAST FORWARD track CDD status -#define FAST_REV 0x10300 // FAST REVERSE track CDD status -#define PLAYING 0x0100 // PLAYING audio track CDD status - - -static int CD_Present = 0; - - -#define CHECK_TRAY_OPEN \ -if (Pico_mcd->scd.Status_CDD == TRAY_OPEN) \ -{ \ - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD; \ - \ - Pico_mcd->cdd.Minute = 0; \ - Pico_mcd->cdd.Seconde = 0; \ - Pico_mcd->cdd.Frame = 0; \ - Pico_mcd->cdd.Ext = 0; \ - \ - Pico_mcd->scd.CDD_Complete = 1; \ - \ - return 2; \ -} - - -#define CHECK_CD_PRESENT \ -if (!CD_Present) \ -{ \ - Pico_mcd->scd.Status_CDD = NOCD; \ - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD; \ - \ - Pico_mcd->cdd.Minute = 0; \ - Pico_mcd->cdd.Seconde = 0; \ - Pico_mcd->cdd.Frame = 0; \ - Pico_mcd->cdd.Ext = 0; \ - \ - Pico_mcd->scd.CDD_Complete = 1; \ - \ - return 3; \ -} - - -static int MSF_to_LBA(_msf *MSF) -{ - return (MSF->M * 60 * 75) + (MSF->S * 75) + MSF->F - 150; -} - - -PICO_INTERNAL void LBA_to_MSF(int lba, _msf *MSF) -{ - if (lba < -150) lba = 0; - else lba += 150; - MSF->M = lba / (60 * 75); - MSF->S = (lba / 75) % 60; - MSF->F = lba % 75; -} - - -static unsigned int MSF_to_Track(_msf *MSF) -{ - int i, Start, Cur; - - Start = (MSF->M << 16) + (MSF->S << 8) + MSF->F; - - for(i = 1; i <= (Pico_mcd->TOC.Last_Track + 1); i++) - { - Cur = Pico_mcd->TOC.Tracks[i - 1].MSF.M << 16; - Cur += Pico_mcd->TOC.Tracks[i - 1].MSF.S << 8; - Cur += Pico_mcd->TOC.Tracks[i - 1].MSF.F; - - if (Cur > Start) break; - } - - --i; - - if (i > Pico_mcd->TOC.Last_Track) return 100; - else if (i < 1) i = 1; - - return (unsigned) i; -} - - -static unsigned int LBA_to_Track(int lba) -{ - _msf MSF; - - LBA_to_MSF(lba, &MSF); - return MSF_to_Track(&MSF); -} - - -static void Track_to_MSF(int track, _msf *MSF) -{ - if (track < 1) track = 1; - else if (track > Pico_mcd->TOC.Last_Track) track = Pico_mcd->TOC.Last_Track; - - MSF->M = Pico_mcd->TOC.Tracks[track - 1].MSF.M; - MSF->S = Pico_mcd->TOC.Tracks[track - 1].MSF.S; - MSF->F = Pico_mcd->TOC.Tracks[track - 1].MSF.F; -} - - -PICO_INTERNAL int Track_to_LBA(int track) -{ - _msf MSF; - - Track_to_MSF(track, &MSF); - return MSF_to_LBA(&MSF); -} - - -PICO_INTERNAL void Check_CD_Command(void) -{ - cdprintf("CHECK CD COMMAND"); - - // Check CDC - if (Pico_mcd->scd.Status_CDC & 1) // CDC is reading data ... - { - cdprintf("Got a read command"); - - // DATA ? - if (Pico_mcd->scd.Cur_Track == 1) - Pico_mcd->s68k_regs[0x36] |= 0x01; - else Pico_mcd->s68k_regs[0x36] &= ~0x01; // AUDIO - - if (Pico_mcd->scd.File_Add_Delay == 0) - { - FILE_Read_One_LBA_CDC(); - } - else Pico_mcd->scd.File_Add_Delay--; - } - - // Check CDD - if (Pico_mcd->scd.CDD_Complete) - { - Pico_mcd->scd.CDD_Complete = 0; - - CDD_Export_Status(); - } - - if (Pico_mcd->scd.Status_CDD == FAST_FOW) - { - Pico_mcd->scd.Cur_LBA += 10; - CDC_Update_Header(); - - } - else if (Pico_mcd->scd.Status_CDD == FAST_REV) - { - Pico_mcd->scd.Cur_LBA -= 10; - if (Pico_mcd->scd.Cur_LBA < -150) Pico_mcd->scd.Cur_LBA = -150; - CDC_Update_Header(); - } -} - - -PICO_INTERNAL int Init_CD_Driver(void) -{ - return 0; -} - - -PICO_INTERNAL void End_CD_Driver(void) -{ - Unload_ISO(); -} - - -PICO_INTERNAL void Reset_CD(void) -{ - Pico_mcd->scd.Cur_Track = 0; - Pico_mcd->scd.Cur_LBA = -150; - Pico_mcd->scd.Status_CDC &= ~1; - if (Pico_mcd->scd.Status_CDD != TRAY_OPEN) - Pico_mcd->scd.Status_CDD = CD_Present ? READY : NOCD; - Pico_mcd->scd.CDD_Complete = 0; - Pico_mcd->scd.File_Add_Delay = 0; -} - - -int Insert_CD(const char *cdimg_name, int type) -{ - int ret = 1; - - CD_Present = 0; - - if (cdimg_name != NULL && type != CIT_NOT_CD) - { - ret = Load_CD_Image(cdimg_name, type); - if (ret == 0) { - CD_Present = 1; - - if (Pico_mcd->scd.Status_CDD == TRAY_OPEN) - { - if (Pico_mcd->bios[0x122 ^ 1] == '2') - Close_Tray_CDD_cC(); - // else bios will issue it - } - else - { - Pico_mcd->scd.Status_CDD = READY; - } - } - } - - if (Pico_mcd->scd.Status_CDD != TRAY_OPEN && !CD_Present) - Pico_mcd->scd.Status_CDD = NOCD; - - return ret; -} - - -int Stop_CD(void) -{ - int ret = CD_Present; - - Unload_ISO(); - CD_Present = 0; - - return ret; -} - - -/* -PICO_INTERNAL void Change_CD(void) -{ - if (Pico_mcd->scd.Status_CDD == TRAY_OPEN) Close_Tray_CDD_cC(); - else Open_Tray_CDD_cD(); -} -*/ - -PICO_INTERNAL int Get_Status_CDD_c0(void) -{ - cdprintf("Status command : Cur LBA = %d", Pico_mcd->scd.Cur_LBA); - - // Clear immediat status - if ((Pico_mcd->cdd.Status & 0x0F00) == 0x0200) - Pico_mcd->cdd.Status = (Pico_mcd->scd.Status_CDD & 0xFF00) | (Pico_mcd->cdd.Status & 0x00FF); - else if ((Pico_mcd->cdd.Status & 0x0F00) == 0x0700) - Pico_mcd->cdd.Status = (Pico_mcd->scd.Status_CDD & 0xFF00) | (Pico_mcd->cdd.Status & 0x00FF); - else if ((Pico_mcd->cdd.Status & 0x0F00) == 0x0E00) - Pico_mcd->cdd.Status = (Pico_mcd->scd.Status_CDD & 0xFF00) | (Pico_mcd->cdd.Status & 0x00FF); - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Stop_CDD_c1(void) -{ - CHECK_TRAY_OPEN - - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read - - if (CD_Present) Pico_mcd->scd.Status_CDD = STOPPED; - else Pico_mcd->scd.Status_CDD = NOCD; - Pico_mcd->cdd.Status = 0x0000; - - Pico_mcd->s68k_regs[0x36] |= 0x01; // Data bit set because stopped - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Get_Pos_CDD_c20(void) -{ - _msf MSF; - - cdprintf("command 200 : Cur LBA = %d", Pico_mcd->scd.Cur_LBA); - - CHECK_TRAY_OPEN - - Pico_mcd->cdd.Status &= 0xFF; - if (!CD_Present) - { - Pico_mcd->scd.Status_CDD = NOCD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - } -// else if (!(CDC.CTRL.B.B0 & 0x80)) Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - - cdprintf("Status CDD = %.4X Status = %.4X", Pico_mcd->scd.Status_CDD, Pico_mcd->cdd.Status); - - LBA_to_MSF(Pico_mcd->scd.Cur_LBA, &MSF); - - Pico_mcd->cdd.Minute = INT_TO_BCDW(MSF.M); - Pico_mcd->cdd.Seconde = INT_TO_BCDW(MSF.S); - Pico_mcd->cdd.Frame = INT_TO_BCDW(MSF.F); - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Get_Track_Pos_CDD_c21(void) -{ - int elapsed_time; - _msf MSF; - - cdprintf("command 201 : Cur LBA = %d", Pico_mcd->scd.Cur_LBA); - - CHECK_TRAY_OPEN - - Pico_mcd->cdd.Status &= 0xFF; - if (!CD_Present) - { - Pico_mcd->scd.Status_CDD = NOCD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - } -// else if (!(CDC.CTRL.B.B0 & 0x80)) Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - - elapsed_time = Pico_mcd->scd.Cur_LBA - Track_to_LBA(LBA_to_Track(Pico_mcd->scd.Cur_LBA)); - LBA_to_MSF(elapsed_time - 150, &MSF); - - cdprintf(" elapsed = %d", elapsed_time); - - Pico_mcd->cdd.Minute = INT_TO_BCDW(MSF.M); - Pico_mcd->cdd.Seconde = INT_TO_BCDW(MSF.S); - Pico_mcd->cdd.Frame = INT_TO_BCDW(MSF.F); - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Get_Current_Track_CDD_c22(void) -{ - cdprintf("Status CDD = %.4X Status = %.4X", Pico_mcd->scd.Status_CDD, Pico_mcd->cdd.Status); - - CHECK_TRAY_OPEN - - Pico_mcd->cdd.Status &= 0xFF; - if (!CD_Present) - { - Pico_mcd->scd.Status_CDD = NOCD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - } -// else if (!(CDC.CTRL.B.B0 & 0x80)) Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - - Pico_mcd->scd.Cur_Track = LBA_to_Track(Pico_mcd->scd.Cur_LBA); - - if (Pico_mcd->scd.Cur_Track == 100) Pico_mcd->cdd.Minute = 0x0A02; - else Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->scd.Cur_Track); - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Get_Total_Lenght_CDD_c23(void) -{ - CHECK_TRAY_OPEN - - Pico_mcd->cdd.Status &= 0xFF; - if (!CD_Present) - { - Pico_mcd->scd.Status_CDD = NOCD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - } -// else if (!(CDC.CTRL.B.B0 & 0x80)) Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - - Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->TOC.Tracks[Pico_mcd->TOC.Last_Track].MSF.M); - Pico_mcd->cdd.Seconde = INT_TO_BCDW(Pico_mcd->TOC.Tracks[Pico_mcd->TOC.Last_Track].MSF.S); - Pico_mcd->cdd.Frame = INT_TO_BCDW(Pico_mcd->TOC.Tracks[Pico_mcd->TOC.Last_Track].MSF.F); - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Get_First_Last_Track_CDD_c24(void) -{ - CHECK_TRAY_OPEN - - Pico_mcd->cdd.Status &= 0xFF; - if (!CD_Present) - { - Pico_mcd->scd.Status_CDD = NOCD; - } -// else if (!(CDC.CTRL.B.B0 & 0x80)) Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - - Pico_mcd->cdd.Minute = INT_TO_BCDW(1); - Pico_mcd->cdd.Seconde = INT_TO_BCDW(Pico_mcd->TOC.Last_Track); - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Get_Track_Adr_CDD_c25(void) -{ - int track_number; - - CHECK_TRAY_OPEN - - // track number in TC4 & TC5 - - track_number = (Pico_mcd->s68k_regs[0x38+10+4] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+5] & 0xF); - - Pico_mcd->cdd.Status &= 0xFF; - if (!CD_Present) - { - Pico_mcd->scd.Status_CDD = NOCD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - } -// else if (!(CDC.CTRL.B.B0 & 0x80)) Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - Pico_mcd->cdd.Status |= Pico_mcd->scd.Status_CDD; - - if (track_number > Pico_mcd->TOC.Last_Track) track_number = Pico_mcd->TOC.Last_Track; - else if (track_number < 1) track_number = 1; - - Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->TOC.Tracks[track_number - 1].MSF.M); - Pico_mcd->cdd.Seconde = INT_TO_BCDW(Pico_mcd->TOC.Tracks[track_number - 1].MSF.S); - Pico_mcd->cdd.Frame = INT_TO_BCDW(Pico_mcd->TOC.Tracks[track_number - 1].MSF.F); - Pico_mcd->cdd.Ext = track_number % 10; - - if (track_number == 1) Pico_mcd->cdd.Frame |= 0x0800; // data track - - Pico_mcd->scd.CDD_Complete = 1; - return 0; -} - - -PICO_INTERNAL int Play_CDD_c3(void) -{ - _msf MSF; - int delay, new_lba; - - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - // MSF of the track to play in TC buffer - - MSF.M = (Pico_mcd->s68k_regs[0x38+10+2] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+3] & 0xF); - MSF.S = (Pico_mcd->s68k_regs[0x38+10+4] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+5] & 0xF); - MSF.F = (Pico_mcd->s68k_regs[0x38+10+6] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+7] & 0xF); - - Pico_mcd->scd.Cur_Track = MSF_to_Track(&MSF); - - new_lba = MSF_to_LBA(&MSF); - delay = new_lba - Pico_mcd->scd.Cur_LBA; - if (delay < 0) delay = -delay; - delay >>= 12; - - if (Pico_mcd->scd.Cur_LBA > 0 && delay < 13) - // based on genplus GX - delay = 13; - - Pico_mcd->scd.Cur_LBA = new_lba; - CDC_Update_Header(); - - cdprintf("Read : Cur LBA = %d, M=%d, S=%d, F=%d", Pico_mcd->scd.Cur_LBA, MSF.M, MSF.S, MSF.F); - - if (Pico_mcd->scd.Status_CDD != PLAYING) delay += 20; - - Pico_mcd->scd.Status_CDD = PLAYING; - Pico_mcd->cdd.Status = 0x0102; -// Pico_mcd->cdd.Status = COMM_OK; - - if (Pico_mcd->scd.File_Add_Delay == 0) Pico_mcd->scd.File_Add_Delay = delay; - - if (Pico_mcd->scd.Cur_Track == 1) - { - Pico_mcd->s68k_regs[0x36] |= 0x01; // DATA - } - else - { - Pico_mcd->s68k_regs[0x36] &= ~0x01; // AUDIO - cdda_start_play(); - } - - if (Pico_mcd->scd.Cur_Track == 100) Pico_mcd->cdd.Minute = 0x0A02; - else Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->scd.Cur_Track); - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.Status_CDC |= 1; // Read data with CDC - - Pico_mcd->scd.CDD_Complete = 1; - return 0; -} - - -PICO_INTERNAL int Seek_CDD_c4(void) -{ - _msf MSF; - - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - // MSF to seek in TC buffer - - MSF.M = (Pico_mcd->s68k_regs[0x38+10+2] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+3] & 0xF); - MSF.S = (Pico_mcd->s68k_regs[0x38+10+4] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+5] & 0xF); - MSF.F = (Pico_mcd->s68k_regs[0x38+10+6] & 0xF) * 10 + (Pico_mcd->s68k_regs[0x38+10+7] & 0xF); - - Pico_mcd->scd.Cur_Track = MSF_to_Track(&MSF); - Pico_mcd->scd.Cur_LBA = MSF_to_LBA(&MSF); - CDC_Update_Header(); - - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read - - Pico_mcd->scd.Status_CDD = READY; - Pico_mcd->cdd.Status = 0x0200; - - // DATA ? - if (Pico_mcd->scd.Cur_Track == 1) - Pico_mcd->s68k_regs[0x36] |= 0x01; - else Pico_mcd->s68k_regs[0x36] &= ~0x01; // AUDIO - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Pause_CDD_c6(void) -{ - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read to start a new one if raw data - - Pico_mcd->scd.Status_CDD = READY; - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD; - - Pico_mcd->s68k_regs[0x36] |= 0x01; // Data bit set because stopped - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Resume_CDD_c7(void) -{ - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - Pico_mcd->scd.Cur_Track = LBA_to_Track(Pico_mcd->scd.Cur_LBA); - -#ifdef DEBUG_CD - { - _msf MSF; - LBA_to_MSF(Pico_mcd->scd.Cur_LBA, &MSF); - cdprintf("Resume read : Cur LBA = %d, M=%d, S=%d, F=%d", Pico_mcd->scd.Cur_LBA, MSF.M, MSF.S, MSF.F); - } -#endif - - Pico_mcd->scd.Status_CDD = PLAYING; - Pico_mcd->cdd.Status = 0x0102; - - if (Pico_mcd->scd.Cur_Track == 1) - { - Pico_mcd->s68k_regs[0x36] |= 0x01; // DATA - } - else - { - Pico_mcd->s68k_regs[0x36] &= ~0x01; // AUDIO - cdda_start_play(); - } - - if (Pico_mcd->scd.Cur_Track == 100) Pico_mcd->cdd.Minute = 0x0A02; - else Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->scd.Cur_Track); - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.Status_CDC |= 1; // Read data with CDC - - Pico_mcd->scd.CDD_Complete = 1; - return 0; -} - - -PICO_INTERNAL int Fast_Foward_CDD_c8(void) -{ - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read - - Pico_mcd->scd.Status_CDD = FAST_FOW; - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD | 2; - - Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->scd.Cur_Track); - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Fast_Rewind_CDD_c9(void) -{ - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read - - Pico_mcd->scd.Status_CDD = FAST_REV; - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD | 2; - - Pico_mcd->cdd.Minute = INT_TO_BCDW(Pico_mcd->scd.Cur_Track); - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Close_Tray_CDD_cC(void) -{ - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read - - elprintf(EL_STATUS, "tray close\n"); - - if (PicoMCDcloseTray != NULL) - PicoMCDcloseTray(); - - Pico_mcd->scd.Status_CDD = CD_Present ? STOPPED : NOCD; - Pico_mcd->cdd.Status = 0x0000; - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int Open_Tray_CDD_cD(void) -{ - CHECK_TRAY_OPEN - - Pico_mcd->scd.Status_CDC &= ~1; // Stop CDC read - - elprintf(EL_STATUS, "tray open\n"); - - Unload_ISO(); - CD_Present = 0; - - if (PicoMCDopenTray != NULL) - PicoMCDopenTray(); - - Pico_mcd->scd.Status_CDD = TRAY_OPEN; - Pico_mcd->cdd.Status = 0x0E00; - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int CDD_cA(void) -{ - CHECK_TRAY_OPEN - CHECK_CD_PRESENT - - Pico_mcd->scd.Status_CDC &= ~1; - - Pico_mcd->scd.Status_CDD = READY; - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD; - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = INT_TO_BCDW(1); - Pico_mcd->cdd.Frame = INT_TO_BCDW(1); - Pico_mcd->cdd.Ext = 0; - - Pico_mcd->scd.CDD_Complete = 1; - - return 0; -} - - -PICO_INTERNAL int CDD_Def(void) -{ - Pico_mcd->cdd.Status = Pico_mcd->scd.Status_CDD; - - Pico_mcd->cdd.Minute = 0; - Pico_mcd->cdd.Seconde = 0; - Pico_mcd->cdd.Frame = 0; - Pico_mcd->cdd.Ext = 0; - - return 0; -} - - diff --git a/pico/cd/cd_sys.h b/pico/cd/cd_sys.h deleted file mode 100644 index 6291c2cd..00000000 --- a/pico/cd/cd_sys.h +++ /dev/null @@ -1,109 +0,0 @@ -/*********************************************************** - * * - * This source was taken from the Gens project * - * Written by Stéphane Dallongeville * - * Copyright (c) 2002 by Stéphane Dallongeville * - * Modified/adapted for PicoDrive by notaz, 2007 * - * * - ***********************************************************/ - -#ifndef _CD_SYS_H -#define _CD_SYS_H - -#include "cd_file.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -#define INT_TO_BCDB(c) \ -((c) > 99)?(0x99):((((c) / 10) << 4) + ((c) % 10)); - -#define INT_TO_BCDW(c) \ -((c) > 99)?(0x0909):((((c) / 10) << 8) + ((c) % 10)); - -#define BCDB_TO_INT(c) \ -(((c) >> 4) * 10) + ((c) & 0xF); - -#define BCDW_TO_INT(c) \ -(((c) >> 8) * 10) + ((c) & 0xF); - - -typedef struct -{ - unsigned char M; - unsigned char S; - unsigned char F; -} _msf; - -typedef struct -{ - _msf MSF; - // - char ftype; // cue_track_type - void *F; - int Length; - int Offset; // sector offset, when single file is used for multiple virtual tracks - short KBtps; // kbytes per sec for mp3s (bitrate / 1000 / 8) - short pad; -} _scd_track; - -typedef struct -{ -// unsigned char First_Track; // always 1 - _scd_track Tracks[100]; - unsigned int Last_Track; -} _scd_toc; - -typedef struct { - unsigned int Status_CDD; - unsigned int Status_CDC; - int Cur_LBA; - unsigned int Cur_Track; - int File_Add_Delay; - char CDD_Complete; - int pad[6]; -} _scd; - - -PICO_INTERNAL void LBA_to_MSF(int lba, _msf *MSF); -PICO_INTERNAL int Track_to_LBA(int track); - -// moved to pico.h -// int Insert_CD(char *iso_name, int is_bin); -// void Stop_CD(void); - -PICO_INTERNAL void Check_CD_Command(void); - -PICO_INTERNAL int Init_CD_Driver(void); -PICO_INTERNAL void End_CD_Driver(void); -PICO_INTERNAL void Reset_CD(void); - -PICO_INTERNAL int Get_Status_CDD_c0(void); -PICO_INTERNAL int Stop_CDD_c1(void); -PICO_INTERNAL int Get_Pos_CDD_c20(void); -PICO_INTERNAL int Get_Track_Pos_CDD_c21(void); -PICO_INTERNAL int Get_Current_Track_CDD_c22(void); -PICO_INTERNAL int Get_Total_Lenght_CDD_c23(void); -PICO_INTERNAL int Get_First_Last_Track_CDD_c24(void); -PICO_INTERNAL int Get_Track_Adr_CDD_c25(void); -PICO_INTERNAL int Play_CDD_c3(void); -PICO_INTERNAL int Seek_CDD_c4(void); -PICO_INTERNAL int Pause_CDD_c6(void); -PICO_INTERNAL int Resume_CDD_c7(void); -PICO_INTERNAL int Fast_Foward_CDD_c8(void); -PICO_INTERNAL int Fast_Rewind_CDD_c9(void); -PICO_INTERNAL int CDD_cA(void); -PICO_INTERNAL int Close_Tray_CDD_cC(void); -PICO_INTERNAL int Open_Tray_CDD_cD(void); - -PICO_INTERNAL int CDD_Def(void); - - -#ifdef __cplusplus -}; -#endif - -#endif - diff --git a/pico/cd/cdc.c b/pico/cd/cdc.c new file mode 100644 index 00000000..1163cb28 --- /dev/null +++ b/pico/cd/cdc.c @@ -0,0 +1,870 @@ +/*************************************************************************************** + * Genesis Plus + * CD data controller (LC89510 compatible) + * + * Copyright (C) 2012 Eke-Eke (Genesis Plus GX) + * + * Redistribution and use of this code or any derivative works are permitted + * provided that the following conditions are met: + * + * - Redistributions may not be sold, nor may they be used in a commercial + * product or activity. + * + * - Redistributions that are modified from the original source must include the + * complete source code, including the source code for all components used by a + * binary built from the modified sources. However, as a special exception, the + * source code distributed need not include anything that is normally distributed + * (in either source or binary form) with the major components (compiler, kernel, + * and so on) of the operating system on which the executable runs, unless that + * component itself accompanies the executable. + * + * - Redistributions must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************************/ + +#include "../pico_int.h" +#include "genplus_macros.h" + +/* IFSTAT register bitmasks */ +#define BIT_DTEI 0x40 +#define BIT_DECI 0x20 +#define BIT_DTBSY 0x08 +#define BIT_DTEN 0x02 + +/* IFCTRL register bitmasks */ +#define BIT_DTEIEN 0x40 +#define BIT_DECIEN 0x20 +#define BIT_DOUTEN 0x02 + +/* CTRL0 register bitmasks */ +#define BIT_DECEN 0x80 +#define BIT_E01RQ 0x20 +#define BIT_AUTORQ 0x10 +#define BIT_WRRQ 0x04 + +/* CTRL1 register bitmasks */ +#define BIT_MODRQ 0x08 +#define BIT_FORMRQ 0x04 +#define BIT_SHDREN 0x01 + +/* CTRL2 register bitmask */ +#define BIT_VALST 0x80 + +/* PicoDrive: doing DMA at once, not using callbacks */ +//#define DMA_BYTES_PER_LINE 512 +#define DMA_CYCLES_PER_BYTE 4 // or 6? + +enum dma_type { + word_ram_0_dma_w = 1, + word_ram_1_dma_w = 2, + word_ram_2M_dma_w = 3, + pcm_ram_dma_w = 4, + prg_ram_dma_w = 5, +}; + +/* CDC hardware */ +typedef struct +{ + uint8 ifstat; + uint8 ifctrl; + uint16 dbc; + uint16 dac; + uint16 pt; + uint16 wa; + uint8 ctrl[2]; + uint8 head[2][4]; + uint8 stat[4]; + int cycles; + //void (*dma_w)(unsigned int words); + int dma_w; + uint8 ram[0x4000 + 2352]; /* 16K external RAM (with one block overhead to handle buffer overrun) */ +} cdc_t; + +static cdc_t cdc; + +void cdc_init(void) +{ + memset(&cdc, 0, sizeof(cdc_t)); +} + +void cdc_reset(void) +{ + /* reset CDC register index */ + Pico_mcd->s68k_regs[0x04+1] = 0x00; + + /* reset CDC registers */ + cdc.ifstat = 0xff; + cdc.ifctrl = 0x00; + cdc.ctrl[0] = 0x00; + cdc.ctrl[1] = 0x00; + cdc.stat[0] = 0x00; + cdc.stat[1] = 0x00; + cdc.stat[2] = 0x00; + cdc.stat[3] = 0x80; + cdc.head[0][0] = 0x00; + cdc.head[0][1] = 0x00; + cdc.head[0][2] = 0x00; + cdc.head[0][3] = 0x01; + cdc.head[1][0] = 0x00; + cdc.head[1][1] = 0x00; + cdc.head[1][2] = 0x00; + cdc.head[1][3] = 0x00; + + /* reset CDC cycle counter */ + cdc.cycles = 0; + + /* DMA transfer disabled */ + cdc.dma_w = 0; +} + +int cdc_context_save(uint8 *state) +{ + uint8 tmp8; + int bufferptr = 0; + + if (cdc.dma_w == pcm_ram_dma_w) + { + tmp8 = 1; + } + else if (cdc.dma_w == prg_ram_dma_w) + { + tmp8 = 2; + } + else if (cdc.dma_w == word_ram_0_dma_w) + { + tmp8 = 3; + } + else if (cdc.dma_w == word_ram_1_dma_w) + { + tmp8 = 4; + } + else if (cdc.dma_w == word_ram_2M_dma_w) + { + tmp8 = 5; + } + else + { + tmp8 = 0; + } + + save_param(&cdc, sizeof(cdc)); + save_param(&tmp8, 1); + + return bufferptr; +} + +int cdc_context_load(uint8 *state) +{ + uint8 tmp8; + int bufferptr = 0; + + load_param(&cdc, sizeof(cdc)); + load_param(&tmp8, 1); + + switch (tmp8) + { + case 1: + cdc.dma_w = pcm_ram_dma_w; + break; + case 2: + cdc.dma_w = prg_ram_dma_w; + break; + case 3: + cdc.dma_w = word_ram_0_dma_w; + break; + case 4: + cdc.dma_w = word_ram_1_dma_w; + break; + case 5: + cdc.dma_w = word_ram_2M_dma_w; + break; + default: + cdc.dma_w = 0; + break; + } + + return bufferptr; +} + +int cdc_context_load_old(uint8 *state) +{ +#define old_load(v, ofs) \ + memcpy(&cdc.v, state + ofs, sizeof(cdc.v)) + + memcpy(cdc.ram, state, 0x4000); + old_load(ifstat, 67892); + old_load(ifctrl, 67924); + old_load(dbc, 67896); + old_load(dac, 67900); + old_load(pt, 67908); + old_load(wa, 67912); + old_load(ctrl, 67928); + old_load(head[0], 67904); + old_load(stat, 67916); + + cdc.dma_w = 0; + switch (Pico_mcd->s68k_regs[0x04+0] & 0x07) + { + case 4: /* PCM RAM DMA */ + cdc.dma_w = pcm_ram_dma_w; + break; + case 5: /* PRG-RAM DMA */ + cdc.dma_w = prg_ram_dma_w; + break; + case 7: /* WORD-RAM DMA */ + if (Pico_mcd->s68k_regs[0x02+1] & 0x04) + { + if (Pico_mcd->s68k_regs[0x02+1] & 0x01) + cdc.dma_w = word_ram_0_dma_w; + else + cdc.dma_w = word_ram_1_dma_w; + } + else + { + if (Pico_mcd->s68k_regs[0x02+1] & 0x02) + cdc.dma_w = word_ram_2M_dma_w; + } + break; + } + + return 0x10960; // sizeof(old_cdc) +#undef old_load +} + +static void do_dma(enum dma_type type, int bytes_in) +{ + int dma_addr = (Pico_mcd->s68k_regs[0x0a] << 8) | Pico_mcd->s68k_regs[0x0b]; + int src_addr = cdc.dac & 0x3ffe; + int dst_addr = dma_addr; + int bytes = bytes_in; + int words = bytes_in >> 1; + int dst_limit = 0; + uint8 *dst; + int len; + + elprintf(EL_CD, "dma %d %04x->%04x %x", + type, cdc.dac, dst_addr, bytes_in); + + switch (type) + { + case pcm_ram_dma_w: + dst_addr = (dst_addr << 2) & 0xffc; + if (dst_addr + bytes > 0x1000) { + elprintf(EL_ANOMALY, "pcm dma oflow: %x %x", dst_addr, words); + bytes = 0x1000 - dst_addr; + } + dst = Pico_mcd->pcm_ram_b[Pico_mcd->pcm.bank]; + dst = dst + dst_addr; + while (bytes > 0) + { + if (src_addr + bytes > 0x4000) { + len = 0x4000 - src_addr; + memcpy(dst, cdc.ram + src_addr, len); + dst += len; + src_addr = 0; + bytes -= len; + continue; + } + memcpy(dst, cdc.ram + src_addr, bytes); + break; + } + goto update_dma; + + case prg_ram_dma_w: + dst_addr <<= 3; + dst = Pico_mcd->prg_ram + dst_addr; + dst_limit = 0x80000; + break; + + case word_ram_0_dma_w: + dst_addr = (dst_addr << 3) & 0x1fffe; + dst = Pico_mcd->word_ram1M[0] + dst_addr; + dst_limit = 0x20000; + break; + + case word_ram_1_dma_w: + dst_addr = (dst_addr << 3) & 0x1fffe; + dst = Pico_mcd->word_ram1M[1] + dst_addr; + dst_limit = 0x20000; + break; + + case word_ram_2M_dma_w: + dst_addr = (dst_addr << 3) & 0x3fffe; + dst = Pico_mcd->word_ram2M + dst_addr; + dst_limit = 0x40000; + break; + + default: + elprintf(EL_ANOMALY, "invalid dma: %d", type); + goto update_dma; + } + + if (dst_addr + words * 2 > dst_limit) { + elprintf(EL_ANOMALY, "cd dma %d oflow: %x %x", type, dst_addr, words); + words = (dst_limit - dst_addr) / 2; + } + while (words > 0) + { + if (src_addr + words * 2 > 0x4000) { + len = 0x4000 - src_addr; + memcpy16bswap((void *)dst, cdc.ram + src_addr, len / 2); + dst += len; + src_addr = 0; + words -= len / 2; + continue; + } + memcpy16bswap((void *)dst, cdc.ram + src_addr, words); + break; + } + + bytes_in &= ~1; // Todo leftover byte? + +update_dma: + /* update DMA addresses */ + cdc.dac += bytes_in; + if (type == pcm_ram_dma_w) + dma_addr += bytes_in >> 2; + else + dma_addr += bytes_in >> 3; + + Pico_mcd->s68k_regs[0x0a] = dma_addr >> 8; + Pico_mcd->s68k_regs[0x0b] = dma_addr; +} + +void cdc_dma_update(void) +{ + /* end of DMA transfer ? */ + //if (cdc.dbc < DMA_BYTES_PER_LINE) + { + /* transfer remaining words using 16-bit DMA */ + //cdc.dma_w((cdc.dbc + 1) >> 1); + do_dma(cdc.dma_w, cdc.dbc + 1); + + /* reset data byte counter (DBCH bits 4-7 should be set to 1) */ + cdc.dbc = 0xffff; + + /* clear !DTEN and !DTBSY */ + cdc.ifstat |= (BIT_DTBSY | BIT_DTEN); + + /* clear DSR bit & set EDT bit (SCD register $04) */ + Pico_mcd->s68k_regs[0x04+0] = (Pico_mcd->s68k_regs[0x04+0] & 0x07) | 0x80; + + if (cdc.ifstat & BIT_DTEI) { + /* pending Data Transfer End interrupt */ + cdc.ifstat &= ~BIT_DTEI; + + /* Data Transfer End interrupt enabled ? */ + if (cdc.ifctrl & BIT_DTEIEN) + { + /* level 5 interrupt enabled ? */ + if (Pico_mcd->s68k_regs[0x32+1] & PCDS_IEN5) + { + /* update IRQ level */ + elprintf(EL_INTS, "cdc DTE irq 5"); + pcd_irq_s68k(5, 1); + } + } + } + + /* disable DMA transfer */ + cdc.dma_w = 0; + } +#if 0 + else + { + /* transfer all words using 16-bit DMA */ + cdc.dma_w(DMA_BYTES_PER_LINE >> 1); + + /* decrement data byte counter */ + cdc.dbc -= length; + } +#endif +} + +int cdc_decoder_update(uint8 header[4]) +{ + /* data decoding enabled ? */ + if (cdc.ctrl[0] & BIT_DECEN) + { + /* update HEAD registers */ + memcpy(cdc.head[0], header, sizeof(cdc.head[0])); + + /* set !VALST */ + cdc.stat[3] = 0x00; + + /* set CRCOK bit */ + cdc.stat[0] = BIT_DECEN; + + /* pending decoder interrupt */ + cdc.ifstat &= ~BIT_DECI; + + /* decoder interrupt enabled ? */ + if (cdc.ifctrl & BIT_DECIEN) + { + /* level 5 interrupt enabled ? */ + if (Pico_mcd->s68k_regs[0x32+1] & PCDS_IEN5) + { + /* update IRQ level */ + elprintf(EL_INTS, "cdc DEC irq 5"); + pcd_irq_s68k(5, 1); + } + } + + /* buffer RAM write enabled ? */ + if (cdc.ctrl[0] & BIT_WRRQ) + { + uint16 offset; + + /* increment block pointer */ + cdc.pt += 2352; + + /* increment write address */ + cdc.wa += 2352; + + /* CDC buffer address */ + offset = cdc.pt & 0x3fff; + + /* write CDD block header (4 bytes) */ + memcpy(cdc.ram + offset, header, 4); + + /* write CDD block data (2048 bytes) */ + cdd_read_data(cdc.ram + 4 + offset); + + /* take care of buffer overrun */ + if (offset > (0x4000 - 2048 - 4)) + { + /* data should be written at the start of buffer */ + memcpy(cdc.ram, cdc.ram + 0x4000, offset + 2048 + 4 - 0x4000); + } + + /* read next data block */ + return 1; + } + } + + /* keep decoding same data block if Buffer Write is disabled */ + return 0; +} + +void cdc_reg_w(unsigned char data) +{ +#ifdef LOG_CDC + elprintf(EL_STATUS, "CDC register %X write 0x%04x", Pico_mcd->s68k_regs[0x04+1] & 0x0F, data); +#endif + switch (Pico_mcd->s68k_regs[0x04+1] & 0x1F) + { + case 0x00: + break; + + case 0x01: /* IFCTRL */ + { + /* pending interrupts ? */ + if (((data & BIT_DTEIEN) && !(cdc.ifstat & BIT_DTEI)) || + ((data & BIT_DECIEN) && !(cdc.ifstat & BIT_DECI))) + { + /* level 5 interrupt enabled ? */ + if (Pico_mcd->s68k_regs[0x32+1] & PCDS_IEN5) + { + /* update IRQ level */ + elprintf(EL_INTS, "cdc pending irq 5"); + pcd_irq_s68k(5, 1); + } + } + else // if (scd.pending & (1 << 5)) + { + /* clear pending level 5 interrupts */ + pcd_irq_s68k(5, 0); + } + + /* abort any data transfer if data output is disabled */ + if (!(data & BIT_DOUTEN)) + { + /* clear !DTBSY and !DTEN */ + cdc.ifstat |= (BIT_DTBSY | BIT_DTEN); + } + + cdc.ifctrl = data; + Pico_mcd->s68k_regs[0x04+1] = 0x02; + break; + } + + case 0x02: /* DBCL */ + cdc.dbc &= 0xff00; + cdc.dbc |= data; + Pico_mcd->s68k_regs[0x04+1] = 0x03; + break; + + case 0x03: /* DBCH */ + cdc.dbc &= 0x00ff; + cdc.dbc |= (data & 0x0f) << 8; + Pico_mcd->s68k_regs[0x04+1] = 0x04; + break; + + case 0x04: /* DACL */ + cdc.dac &= 0xff00; + cdc.dac |= data; + Pico_mcd->s68k_regs[0x04+1] = 0x05; + break; + + case 0x05: /* DACH */ + cdc.dac &= 0x00ff; + cdc.dac |= data << 8; + Pico_mcd->s68k_regs[0x04+1] = 0x06; + break; + + case 0x06: /* DTRG */ + { + /* start data transfer if data output is enabled */ + if (cdc.ifctrl & BIT_DOUTEN) + { + /* set !DTBSY */ + cdc.ifstat &= ~BIT_DTBSY; + + /* clear DBCH bits 4-7 */ + cdc.dbc &= 0x0fff; + + /* clear EDT & DSR bits (SCD register $04) */ + Pico_mcd->s68k_regs[0x04+0] &= 0x07; + + cdc.dma_w = 0; + + /* setup data transfer destination */ + switch (Pico_mcd->s68k_regs[0x04+0] & 0x07) + { + case 2: /* MAIN-CPU host read */ + case 3: /* SUB-CPU host read */ + { + /* set !DTEN */ + cdc.ifstat &= ~BIT_DTEN; + + /* set DSR bit (register $04) */ + Pico_mcd->s68k_regs[0x04+0] |= 0x40; + break; + } + + case 4: /* PCM RAM DMA */ + { + cdc.dma_w = pcm_ram_dma_w; + break; + } + + case 5: /* PRG-RAM DMA */ + { + cdc.dma_w = prg_ram_dma_w; + break; + } + + case 7: /* WORD-RAM DMA */ + { + /* check memory mode */ + if (Pico_mcd->s68k_regs[0x02+1] & 0x04) + { + /* 1M mode */ + if (Pico_mcd->s68k_regs[0x02+1] & 0x01) + { + /* Word-RAM bank 0 is assigned to SUB-CPU */ + cdc.dma_w = word_ram_0_dma_w; + } + else + { + /* Word-RAM bank 1 is assigned to SUB-CPU */ + cdc.dma_w = word_ram_1_dma_w; + } + } + else + { + /* 2M mode */ + if (Pico_mcd->s68k_regs[0x02+1] & 0x02) + { + /* only process DMA if Word-RAM is assigned to SUB-CPU */ + cdc.dma_w = word_ram_2M_dma_w; + } + } + break; + } + + default: /* invalid */ + { + elprintf(EL_ANOMALY, "invalid CDC tranfer destination (%d)", + Pico_mcd->s68k_regs[0x04+0] & 0x07); + break; + } + } + + if (cdc.dma_w) + pcd_event_schedule_s68k(PCD_EVENT_DMA, cdc.dbc * DMA_CYCLES_PER_BYTE); + } + + Pico_mcd->s68k_regs[0x04+1] = 0x07; + break; + } + + case 0x07: /* DTACK */ + { + /* clear pending data transfer end interrupt */ + cdc.ifstat |= BIT_DTEI; + + /* clear DBCH bits 4-7 */ + cdc.dbc &= 0x0fff; + +#if 0 + /* no pending decoder interrupt ? */ + if ((cdc.ifstat | BIT_DECI) || !(cdc.ifctrl & BIT_DECIEN)) + { + /* clear pending level 5 interrupt */ + pcd_irq_s68k(5, 0); + } +#endif + Pico_mcd->s68k_regs[0x04+1] = 0x08; + break; + } + + case 0x08: /* WAL */ + cdc.wa &= 0xff00; + cdc.wa |= data; + Pico_mcd->s68k_regs[0x04+1] = 0x09; + break; + + case 0x09: /* WAH */ + cdc.wa &= 0x00ff; + cdc.wa |= data << 8; + Pico_mcd->s68k_regs[0x04+1] = 0x0a; + break; + + case 0x0a: /* CTRL0 */ + { + /* reset DECI if decoder turned off */ + if (!(data & BIT_DECEN)) + cdc.ifstat |= BIT_DECI; + + /* update decoding mode */ + if (data & BIT_AUTORQ) + { + /* set MODE bit according to CTRL1 register & clear FORM bit */ + cdc.stat[2] = cdc.ctrl[1] & BIT_MODRQ; + } + else + { + /* set MODE & FORM bits according to CTRL1 register */ + cdc.stat[2] = cdc.ctrl[1] & (BIT_MODRQ | BIT_FORMRQ); + } + + cdc.ctrl[0] = data; + Pico_mcd->s68k_regs[0x04+1] = 0x0b; + break; + } + + case 0x0b: /* CTRL1 */ + { + /* update decoding mode */ + if (cdc.ctrl[0] & BIT_AUTORQ) + { + /* set MODE bit according to CTRL1 register & clear FORM bit */ + cdc.stat[2] = data & BIT_MODRQ; + } + else + { + /* set MODE & FORM bits according to CTRL1 register */ + cdc.stat[2] = data & (BIT_MODRQ | BIT_FORMRQ); + } + + cdc.ctrl[1] = data; + Pico_mcd->s68k_regs[0x04+1] = 0x0c; + break; + } + + case 0x0c: /* PTL */ + cdc.pt &= 0xff00; + cdc.pt |= data; + Pico_mcd->s68k_regs[0x04+1] = 0x0d; + break; + + case 0x0d: /* PTH */ + cdc.pt &= 0x00ff; + cdc.pt |= data << 8; + Pico_mcd->s68k_regs[0x04+1] = 0x0e; + break; + + case 0x0e: /* CTRL2 (unused) */ + Pico_mcd->s68k_regs[0x04+1] = 0x0f; + break; + + case 0x0f: /* RESET */ + cdc_reset(); + break; + + default: /* by default, SBOUT is not used */ + Pico_mcd->s68k_regs[0x04+1] = (Pico_mcd->s68k_regs[0x04+1] + 1) & 0x1f; + break; + } +} + +unsigned char cdc_reg_r(void) +{ + switch (Pico_mcd->s68k_regs[0x04+1] & 0x1F) + { + case 0x00: + return 0xff; + + case 0x01: /* IFSTAT */ + Pico_mcd->s68k_regs[0x04+1] = 0x02; + return cdc.ifstat; + + case 0x02: /* DBCL */ + Pico_mcd->s68k_regs[0x04+1] = 0x03; + return cdc.dbc & 0xff; + + case 0x03: /* DBCH */ + Pico_mcd->s68k_regs[0x04+1] = 0x04; + return (cdc.dbc >> 8) & 0xff; + + case 0x04: /* HEAD0 */ + Pico_mcd->s68k_regs[0x04+1] = 0x05; + return cdc.head[cdc.ctrl[1] & BIT_SHDREN][0]; + + case 0x05: /* HEAD1 */ + Pico_mcd->s68k_regs[0x04+1] = 0x06; + return cdc.head[cdc.ctrl[1] & BIT_SHDREN][1]; + + case 0x06: /* HEAD2 */ + Pico_mcd->s68k_regs[0x04+1] = 0x07; + return cdc.head[cdc.ctrl[1] & BIT_SHDREN][2]; + + case 0x07: /* HEAD3 */ + Pico_mcd->s68k_regs[0x04+1] = 0x08; + return cdc.head[cdc.ctrl[1] & BIT_SHDREN][3]; + + case 0x08: /* PTL */ + Pico_mcd->s68k_regs[0x04+1] = 0x09; + return cdc.pt & 0xff; + + case 0x09: /* PTH */ + Pico_mcd->s68k_regs[0x04+1] = 0x0a; + return (cdc.pt >> 8) & 0xff; + + case 0x0a: /* WAL */ + Pico_mcd->s68k_regs[0x04+1] = 0x0b; + return cdc.wa & 0xff; + + case 0x0b: /* WAH */ + Pico_mcd->s68k_regs[0x04+1] = 0x0c; + return (cdc.wa >> 8) & 0xff; + + case 0x0c: /* STAT0 */ + Pico_mcd->s68k_regs[0x04+1] = 0x0d; + return cdc.stat[0]; + + case 0x0d: /* STAT1 (always return 0) */ + Pico_mcd->s68k_regs[0x04+1] = 0x0e; + return 0x00; + + case 0x0e: /* STAT2 */ + Pico_mcd->s68k_regs[0x04+1] = 0x0f; + return cdc.stat[2]; + + case 0x0f: /* STAT3 */ + { + uint8 data = cdc.stat[3]; + + /* clear !VALST (note: this is not 100% correct but BIOS do not seem to care) */ + cdc.stat[3] = BIT_VALST; + + /* clear pending decoder interrupt */ + cdc.ifstat |= BIT_DECI; + +#if 0 + /* no pending data transfer end interrupt */ + if ((cdc.ifstat | BIT_DTEI) || !(cdc.ifctrl & BIT_DTEIEN)) + { + /* clear pending level 5 interrupt */ + pcd_irq_s68k(5, 0); + } +#endif + + Pico_mcd->s68k_regs[0x04+1] = 0x10; + return data; + } + + default: /* by default, COMIN is always empty */ + Pico_mcd->s68k_regs[0x04+1] = (Pico_mcd->s68k_regs[0x04+1] + 1) & 0x1f; + return 0xff; + } +} + +unsigned short cdc_host_r(void) +{ + /* check if data is available */ + if (!(cdc.ifstat & BIT_DTEN)) + { + /* read data word from CDC RAM buffer */ + uint8 *datap = cdc.ram + (cdc.dac & 0x3ffe); + uint16 data = (datap[0] << 8) | datap[1]; + +#ifdef LOG_CDC + error("CDC host read 0x%04x -> 0x%04x (dbc=0x%x) (%X)\n", cdc.dac, data, cdc.dbc, s68k.pc); +#endif + + /* increment data address counter */ + cdc.dac += 2; + + /* decrement data byte counter */ + cdc.dbc -= 2; + + /* end of transfer ? */ + if ((int16)cdc.dbc <= 0) + { + /* reset data byte counter (DBCH bits 4-7 should be set to 1) */ + cdc.dbc = 0xffff; + + /* clear !DTEN and !DTBSY */ + cdc.ifstat |= (BIT_DTBSY | BIT_DTEN); + + /* clear DSR bit & set EDT bit (SCD register $04) */ + Pico_mcd->s68k_regs[0x04+0] = (Pico_mcd->s68k_regs[0x04+0] & 0x07) | 0x80; + + } else if ((int16)cdc.dbc <= 2) + { + if (cdc.ifstat & BIT_DTEI) { + /* pending Data Transfer End interrupt */ + cdc.ifstat &= ~BIT_DTEI; + + /* Data Transfer End interrupt enabled ? */ + if (cdc.ifctrl & BIT_DTEIEN) + { + /* level 5 interrupt enabled ? */ + if (Pico_mcd->s68k_regs[0x32+1] & PCDS_IEN5) + { + /* update IRQ level */ + elprintf(EL_INTS, "cdc DTE irq 5"); + pcd_irq_s68k(5, 1); + } + } + } + /* set DSR and EDT bit (SCD register $04) */ + Pico_mcd->s68k_regs[0x04+0] = (Pico_mcd->s68k_regs[0x04+0] & 0x07) | 0xc0; + } + + return data; + } + +#ifdef LOG_CDC + error("error reading CDC host (data transfer disabled)\n"); +#endif + return 0xffff; +} + +// vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/cd/cdd.c b/pico/cd/cdd.c new file mode 100644 index 00000000..89ca183b --- /dev/null +++ b/pico/cd/cdd.c @@ -0,0 +1,1239 @@ +/*************************************************************************************** + * Genesis Plus + * CD drive processor & CD-DA fader + * + * Copyright (C) 2012-2013 Eke-Eke (Genesis Plus GX) + * + * Redistribution and use of this code or any derivative works are permitted + * provided that the following conditions are met: + * + * - Redistributions may not be sold, nor may they be used in a commercial + * product or activity. + * + * - Redistributions that are modified from the original source must include the + * complete source code, including the source code for all components used by a + * binary built from the modified sources. However, as a special exception, the + * source code distributed need not include anything that is normally distributed + * (in either source or binary form) with the major components (compiler, kernel, + * and so on) of the operating system on which the executable runs, unless that + * component itself accompanies the executable. + * + * - Redistributions must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************************/ + +#include "../pico_int.h" +#include "genplus_macros.h" +#include "cd_parse.h" +#include "cdd.h" + +#ifdef USE_LIBTREMOR +#define SUPPORTED_EXT 20 +#else +#define SUPPORTED_EXT 10 +#endif + +cdd_t cdd; + +#define is_audio(index) \ + (cdd.toc.tracks[index].type & CT_AUDIO) + +/* BCD conversion lookup tables */ +static const uint8 lut_BCD_8[100] = +{ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, +}; + +static const uint16 lut_BCD_16[100] = +{ + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, + 0x0100, 0x0101, 0x0102, 0x0103, 0x0104, 0x0105, 0x0106, 0x0107, 0x0108, 0x0109, + 0x0200, 0x0201, 0x0202, 0x0203, 0x0204, 0x0205, 0x0206, 0x0207, 0x0208, 0x0209, + 0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0305, 0x0306, 0x0307, 0x0308, 0x0309, + 0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, + 0x0500, 0x0501, 0x0502, 0x0503, 0x0504, 0x0505, 0x0506, 0x0507, 0x0508, 0x0509, + 0x0600, 0x0601, 0x0602, 0x0603, 0x0604, 0x0605, 0x0606, 0x0607, 0x0608, 0x0609, + 0x0700, 0x0701, 0x0702, 0x0703, 0x0704, 0x0705, 0x0706, 0x0707, 0x0708, 0x0709, + 0x0800, 0x0801, 0x0802, 0x0803, 0x0804, 0x0805, 0x0806, 0x0807, 0x0808, 0x0809, + 0x0900, 0x0901, 0x0902, 0x0903, 0x0904, 0x0905, 0x0906, 0x0907, 0x0908, 0x0909, +}; + +/* pre-build TOC */ +static const uint16 toc_snatcher[21] = +{ + 56014, 495, 10120, 20555, 1580, 5417, 12502, 16090, 6553, 9681, + 8148, 20228, 8622, 6142, 5858, 1287, 7424, 3535, 31697, 2485, + 31380 +}; + +static const uint16 toc_lunar[52] = +{ + 5422, 1057, 7932, 5401, 6380, 6592, 5862, 5937, 5478, 5870, + 6673, 6613, 6429, 4996, 4977, 5657, 3720, 5892, 3140, 3263, + 6351, 5187, 3249, 1464, 1596, 1750, 1751, 6599, 4578, 5205, + 1550, 1827, 2328, 1346, 1569, 1613, 7199, 4928, 1656, 2549, + 1875, 3901, 1850, 2399, 2028, 1724, 4889, 14551, 1184, 2132, + 685, 3167 +}; + +static const uint32 toc_shadow[15] = +{ + 10226, 70054, 11100, 12532, 12444, 11923, 10059, 10167, 10138, 13792, + 11637, 2547, 2521, 3856, 900 +}; + +static const uint32 toc_dungeon[13] = +{ + 2250, 22950, 16350, 24900, 13875, 19950, 13800, 15375, 17400, 17100, + 3325, 6825, 25275 +}; + +static const uint32 toc_ffight[26] = +{ + 11994, 9742, 10136, 9685, 9553, 14588, 9430, 8721, 9975, 9764, + 9704, 12796, 585, 754, 951, 624, 9047, 1068, 817, 9191, 1024, + 14562, 10320, 8627, 3795, 3047 +}; + +static const uint32 toc_ffightj[29] = +{ + 11994, 9752, 10119, 9690, 9567, 14575, 9431, 8731, 9965, 9763, + 9716, 12791, 579, 751, 958, 630, 9050, 1052, 825, 9193, 1026, + 14553, 9834, 10542, 1699, 1792, 1781, 3783, 3052 +}; + +#if 0 +/* supported WAVE file header (16-bit stereo samples @44.1kHz) */ +static const unsigned char waveHeader[32] = +{ + 0x57,0x41,0x56,0x45,0x66,0x6d,0x74,0x20,0x10,0x00,0x00,0x00,0x01,0x00,0x02,0x00, + 0x44,0xac,0x00,0x00,0x10,0xb1,0x02,0x00,0x04,0x00,0x10,0x00,0x64,0x61,0x74,0x61 +}; +#endif + +#ifdef USE_LIBTREMOR +#ifdef DISABLE_MANY_OGG_OPEN_FILES +static void ogg_free(int i) +{ + /* clear OGG file descriptor to prevent file from being closed */ + cdd.toc.tracks[i].vf.datasource = NULL; + + /* close VORBIS file structure */ + ov_clear(&cdd.toc.tracks[i].vf); + + /* indicates that the track is a seekable VORBIS file */ + cdd.toc.tracks[i].vf.seekable = 1; + + /* reset file reading position */ + fseek(cdd.toc.tracks[i].fd, 0, SEEK_SET); +} +#endif +#endif + +void cdd_reset(void) +{ + /* reset cycle counter */ + cdd.cycles = 0; + + /* reset drive access latency */ + cdd.latency = 0; + + /* reset track index */ + cdd.index = -1; + + /* reset logical block address */ + cdd.lba = 0; + + /* reset status */ + cdd.status = NO_DISC; + + /* reset CD-DA fader (full volume) */ + cdd.volume = 0x400; + + /* clear CD-DA output */ + cdd.audio[0] = cdd.audio[1] = 0; +} + +/* FIXME: use cdd_read_audio() instead */ +static void cdd_change_track(int index, int lba) +{ + int i, base, lba_offset, lb_len; + + for (i = index; i >= 0; i--) + if (cdd.toc.tracks[i].fd != NULL) + break; + + Pico_mcd->cdda_stream = cdd.toc.tracks[i].fd; + base = cdd.toc.tracks[index].offset; + lba_offset = lba - cdd.toc.tracks[index].start; + lb_len = cdd.toc.tracks[index].end - cdd.toc.tracks[index].start; + + elprintf(EL_CD, "play #%d lba %d base %d", index, lba, base); + + cdda_start_play(base, lba_offset, lb_len); +} + +static off_t read_pos = -1; + +void cdd_seek(int index, int lba) +{ + int aindex = (index < 0 ? -index : index); + +#ifdef USE_LIBTREMOR +#ifdef DISABLE_MANY_OGG_OPEN_FILES + /* check if track index has changed */ + if (index != cdd.index) + { + /* close previous track VORBIS file structure to save memory */ + if (cdd.index >= 0 && cdd.toc.tracks[cdd.index].vf.datasource) + { + ogg_free(cdd.index); + } + + /* open current track VORBIS file */ + if (cdd.toc.tracks[aindex].vf.seekable) + { + ov_open(cdd.toc.tracks[aindex].fd,&cdd.toc.tracks[aindex].vf,0,0); + } + } +#endif +#endif + + /* update current track index and LBA */ + cdd.index = aindex; + cdd.lba = lba; + + /* stay within track limits when seeking files */ + if (lba < cdd.toc.tracks[cdd.index].start) + { + lba = cdd.toc.tracks[cdd.index].start; + } + + /* seek to current block */ + if (!is_audio(cdd.index)) + { + /* DATA track */ + read_pos = lba * cdd.sectorSize; + pm_seek(cdd.toc.tracks[cdd.index].fd, read_pos, SEEK_SET); + } +#ifdef USE_LIBTREMOR + else if (cdd.toc.tracks[cdd.index].vf.seekable) + { + /* VORBIS AUDIO track */ + ov_pcm_seek(&cdd.toc.tracks[cdd.index].vf, (lba - cdd.toc.tracks[cdd.index].start) * 588 - cdd.toc.tracks[cdd.index].offset); + } +#endif +#if 0 + else if (cdd.toc.tracks[cdd.index].fd) + { + /* PCM AUDIO track */ + fseek(cdd.toc.tracks[cdd.index].fd, (lba * 2352) - cdd.toc.tracks[cdd.index].offset, SEEK_SET); + } +#else + else + { + cdd_change_track(cdd.index, lba); + } +#endif +} + +int cdd_context_save(uint8 *state) +{ + int bufferptr = 0; + + save_param(&cdd.cycles, sizeof(cdd.cycles)); + save_param(&cdd.latency, sizeof(cdd.latency)); + save_param(&cdd.index, sizeof(cdd.index)); + save_param(&cdd.lba, sizeof(cdd.lba)); + save_param(&cdd.scanOffset, sizeof(cdd.scanOffset)); + save_param(&cdd.volume, sizeof(cdd.volume)); + save_param(&cdd.status, sizeof(cdd.status)); + + return bufferptr; +} + +int cdd_context_load(uint8 *state) +{ + int bufferptr = 0; + + load_param(&cdd.cycles, sizeof(cdd.cycles)); + load_param(&cdd.latency, sizeof(cdd.latency)); + load_param(&cdd.index, sizeof(cdd.index)); + load_param(&cdd.lba, sizeof(cdd.lba)); + load_param(&cdd.scanOffset, sizeof(cdd.scanOffset)); + load_param(&cdd.volume, sizeof(cdd.volume)); + load_param(&cdd.status, sizeof(cdd.status)); + + /* seek to current track position */ + cdd_seek(-cdd.index, cdd.lba); + + return bufferptr; +} + +int cdd_context_load_old(uint8 *state) +{ + memcpy(&cdd.lba, state + 8, sizeof(cdd.lba)); + cdd_seek(-cdd.index, cdd.lba); + + return 12 * 4; +} + +int cdd_load(const char *filename, int type) +{ + char header[0x210]; + int ret; + + /* first unmount any loaded disc */ + cdd_unload(); + + /* genplus parses cue here, in PD we use our own parser */ + ret = load_cd_image(filename, &type); + if (ret != 0) + return ret; + + /* read first 16 bytes */ + pm_read(header, 0x10, cdd.toc.tracks[0].fd); + + /* look for valid CD image ID string */ + if (memcmp("SEGADISCSYSTEM", header, 14)) + { + /* if not found, read next 16 bytes */ + pm_read(header, 0x10, cdd.toc.tracks[0].fd); + + /* look again for valid CD image ID string */ + if (memcmp("SEGADISCSYSTEM", header, 14)) + { + elprintf(EL_STATUS|EL_ANOMALY, "cd: bad cd image?"); + /* assume bin without security code */ + } + + /* BIN format (2352 bytes data blocks) */ + cdd.sectorSize = 2352; + } + else + { + /* ISO format (2048 bytes data blocks) */ + cdd.sectorSize = 2048; + } + + ret = (type == CT_ISO ? 2048 : 2352); + if (ret != cdd.sectorSize) + elprintf(EL_STATUS|EL_ANOMALY, "cd: type detection mismatch"); + pm_sectorsize(cdd.sectorSize, cdd.toc.tracks[0].fd); + + /* read CD image header + security code */ + pm_read(header + 0x10, 0x200, cdd.toc.tracks[0].fd); + + /* Simulate audio tracks if none found */ + if (cdd.toc.last == 1) + { + /* Some games require exact TOC infos */ + if (strstr(header + 0x180,"T-95035") != NULL) + { + /* Snatcher */ + cdd.toc.last = cdd.toc.end = 0; + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + toc_snatcher[cdd.toc.last]; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while (cdd.toc.last < 21); + } + else if (strstr(header + 0x180,"T-127015") != NULL) + { + /* Lunar - The Silver Star */ + cdd.toc.last = cdd.toc.end = 0; + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + toc_lunar[cdd.toc.last]; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while (cdd.toc.last < 52); + } + else if (strstr(header + 0x180,"T-113045") != NULL) + { + /* Shadow of the Beast II */ + cdd.toc.last = cdd.toc.end = 0; + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + toc_shadow[cdd.toc.last]; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while (cdd.toc.last < 15); + } + else if (strstr(header + 0x180,"T-143025") != NULL) + { + /* Dungeon Explorer */ + cdd.toc.last = cdd.toc.end = 0; + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + toc_dungeon[cdd.toc.last]; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while (cdd.toc.last < 13); + } + else if (strstr(header + 0x180,"MK-4410") != NULL) + { + /* Final Fight CD (USA, Europe) */ + cdd.toc.last = cdd.toc.end = 0; + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + toc_ffight[cdd.toc.last]; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while (cdd.toc.last < 26); + } + else if (strstr(header + 0x180,"G-6013") != NULL) + { + /* Final Fight CD (Japan) */ + cdd.toc.last = cdd.toc.end = 0; + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + toc_ffightj[cdd.toc.last]; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while (cdd.toc.last < 29); + } +#if 0 + else + { + /* default TOC (99 tracks & 2s per audio tracks) */ + do + { + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end + 2*75; + cdd.toc.tracks[cdd.toc.last].end = cdd.toc.tracks[cdd.toc.last].start + 2*75; + cdd.toc.end = cdd.toc.tracks[cdd.toc.last].end; + cdd.toc.last++; + } + while ((cdd.toc.last < 99) && (cdd.toc.end < 56*60*75)); + } +#endif + } + + /* Lead-out */ + cdd.toc.tracks[cdd.toc.last].start = cdd.toc.end; + + /* CD loaded */ + cdd.loaded = 1; + + /* disc not scanned yet */ + cdd.status = NO_DISC; + + return 0; +} + +int cdd_unload(void) +{ + int was_loaded = cdd.loaded; + + if (cdd.loaded) + { + int i; + + /* stop audio streaming */ + Pico_mcd->cdda_stream = NULL; + + /* close CD tracks */ + if (cdd.toc.tracks[0].fd) + { + pm_close(cdd.toc.tracks[0].fd); + cdd.toc.tracks[0].fd = NULL; + if (cdd.toc.tracks[0].fname) + free(cdd.toc.tracks[0].fname); + cdd.toc.tracks[0].fname = NULL; + } + + for (i = 1; i < cdd.toc.last; i++) + { +#ifdef USE_LIBTREMOR + if (cdd.toc.tracks[i].vf.datasource) + { + /* close VORBIS file (if still opened) */ + ov_clear(&cdd.toc.tracks[i].vf); + } + else +#endif + if (cdd.toc.tracks[i].fd) + { + /* close file */ + if (Pico_mcd->cdda_type == CT_MP3) + fclose(cdd.toc.tracks[i].fd); + else + pm_close(cdd.toc.tracks[i].fd); + cdd.toc.tracks[i].fd = NULL; + if (cdd.toc.tracks[i].fname) + free(cdd.toc.tracks[i].fname); + cdd.toc.tracks[i].fname = NULL; + + /* detect single file images */ + if (cdd.toc.tracks[i+1].fd == cdd.toc.tracks[i].fd) + { + /* exit loop */ + i = cdd.toc.last; + } + } + } + + /* CD unloaded */ + cdd.loaded = 0; + + if (cdd.status != CD_OPEN) + cdd.status = NO_DISC; + } + + /* reset TOC */ + memset(&cdd.toc, 0x00, sizeof(cdd.toc)); + + /* unknown CD image file format */ + cdd.sectorSize = 0; + + return was_loaded; +} + +void cdd_read_data(uint8 *dst) +{ + /* only read DATA track sectors */ + if (!is_audio(cdd.index) && (cdd.lba >= cdd.toc.tracks[cdd.index].start) && + (cdd.lba < cdd.toc.tracks[cdd.index].end)) + { + off_t pos; + + /* BIN format ? */ + if (cdd.sectorSize == 2352) + { + /* skip 16-byte header */ + pos = cdd.lba * 2352 + 16; + } + else + { + pos = cdd.lba * cdd.sectorSize; + } + + if (pos != read_pos) { + pm_seek(cdd.toc.tracks[cdd.index].fd, pos, SEEK_SET); + read_pos = pos; + } + + /* read sector data (Mode 1 = 2048 bytes) */ + read_pos += pm_read(dst, 2048, cdd.toc.tracks[cdd.index].fd); + } +} + +#if 0 +void cdd_read_audio(unsigned int samples) +{ + /* previous audio outputs */ + int16 l = cdd.audio[0]; + int16 r = cdd.audio[1]; + + /* get number of internal clocks (samples) needed */ + samples = blip_clocks_needed(blip[0], samples); + + /* audio track playing ? */ + if (!Pico_mcd->s68k_regs[0x36+0] && cdd.toc.tracks[cdd.index].fd) + { + int i, mul, delta; + + /* current CD-DA fader volume */ + int curVol = cdd.volume; + + /* CD-DA fader volume setup (0-1024) */ + int endVol = Pico_mcd->regs[0x34>>1].w >> 4; + + /* read samples from current block */ +#ifdef USE_LIBTREMOR + if (cdd.toc.tracks[cdd.index].vf.datasource) + { + int len, done = 0; + int16 *ptr = (int16 *) (cdc.ram); + samples = samples * 4; + while (done < samples) + { + len = ov_read(&cdd.toc.tracks[cdd.index].vf, (char *)(cdc.ram + done), samples - done, 0); + if (len <= 0) + { + done = samples; + break; + } + done += len; + } + samples = done / 4; + + /* process 16-bit (host-endian) stereo samples */ + for (i=0; i endVol) + { + /* fade-out */ + curVol--; + } + else if (!curVol) + { + /* audio will remain muted until next setup */ + break; + } + } + } + else +#endif + { +#ifdef LSB_FIRST + int16 *ptr = (int16 *) (cdc.ram); +#else + uint8 *ptr = cdc.ram; +#endif + fread(cdc.ram, 1, samples * 4, cdd.toc.tracks[cdd.index].fd); + + /* process 16-bit (little-endian) stereo samples */ + for (i=0; i endVol) + { + /* fade-out */ + curVol--; + } + else if (!curVol) + { + /* audio will remain muted until next setup */ + break; + } + } + } + + /* save current CD-DA fader volume */ + cdd.volume = curVol; + + /* save last audio output for next frame */ + cdd.audio[0] = l; + cdd.audio[1] = r; + } + else + { + /* no audio output */ + if (l) blip_add_delta_fast(blip[0], 0, -l); + if (r) blip_add_delta_fast(blip[1], 0, -r); + + /* save audio output for next frame */ + cdd.audio[0] = 0; + cdd.audio[1] = 0; + } + + /* end of Blip Buffer timeframe */ + blip_end_frame(blip[0], samples); + blip_end_frame(blip[1], samples); +} +#endif + + +void cdd_update(void) +{ +#ifdef LOG_CDD + error("LBA = %d (track n%d)(latency=%d)\n", cdd.lba, cdd.index, cdd.latency); +#endif + + /* drive latency */ + if (cdd.latency > 0) + { + cdd.latency--; + return; + } + + /* reading disc */ + if (cdd.status == CD_PLAY) + { + if (cdd.index >= cdd.toc.last) + { + /* end of disc */ + cdd.status = CD_END; + return; + } + + /* track type */ + if (!is_audio(cdd.index)) + { + /* DATA sector header (CD-ROM Mode 1) */ + uint8 header[4]; + uint32 msf = cdd.lba + 150; + header[0] = lut_BCD_8[(msf / 75) / 60]; + header[1] = lut_BCD_8[(msf / 75) % 60]; + header[2] = lut_BCD_8[(msf % 75)]; + header[3] = 0x01; + + /* data track sector read is controlled by CDC */ + cdc_decoder_update(header); + } + else + { + uint8 header[4] = { 0, }; + + /* check against audio track start index */ + if (cdd.lba >= cdd.toc.tracks[cdd.index].start) + { + /* audio track playing */ + Pico_mcd->s68k_regs[0x36+0] = 0x00; + } + + /* audio blocks are still sent to CDC as well as CD DAC/Fader */ + cdc_decoder_update(header); + } + + /* next block is automatically read */ + cdd.lba++; + + /* check end of current track */ + if (cdd.lba >= cdd.toc.tracks[cdd.index].end) + { + /* PAUSE between tracks */ + Pico_mcd->s68k_regs[0x36+0] = 0x01; + + /* seek to next audio track start */ + cdd_seek(cdd.index + 1, cdd.lba); + } + } + + /* scanning disc */ + else if (cdd.status == CD_SCAN) + { + /* fast-forward or fast-rewind */ + cdd.lba += cdd.scanOffset; + + /* check current track limits */ + if (cdd.lba >= cdd.toc.tracks[cdd.index].end) + { + /* next track */ + if (cdd.index >= cdd.toc.last) + { + /* no AUDIO track playing */ + Pico_mcd->s68k_regs[0x36+0] = 0x01; + + /* end of disc */ + cdd.lba = cdd.toc.end; + cdd.status = CD_END; + } + else + { + cdd_seek(cdd.index + 1, cdd.toc.tracks[cdd.index+1].start); + + /* AUDIO track playing ? */ + if (cdd.status == CD_PLAY && is_audio(cdd.index)) + { + Pico_mcd->s68k_regs[0x36+0] = 0x00; + } + } + } + else if (cdd.lba < cdd.toc.tracks[cdd.index].start) + { + /* previous track */ + if (cdd.index <= 0) + { + cdd_seek(0, 0); + } + else + { + cdd_seek(cdd.index - 1, cdd.toc.tracks[cdd.index-1].end); + } + } + + if (!is_audio(cdd.index)) + { + /* no AUDIO track playing */ + Pico_mcd->s68k_regs[0x36+0] = 0x01; + } + } + + if (Pico_mcd->m.state_flags & PCD_ST_CDD_CMD) { + /* pending delayed command */ + cdd_process(); + Pico_mcd->m.state_flags &= ~PCD_ST_CDD_CMD; + } +} + +#define set_reg16(r, v) { \ + uint16 _v = v; \ + Pico_mcd->s68k_regs[(r)] = _v >> 8; \ + Pico_mcd->s68k_regs[(r)+1] = _v; \ +} + +void cdd_process(void) +{ + /* Process CDD command */ + switch (Pico_mcd->s68k_regs[0x42+0] & 0x0f) + { + case 0x00: /* Drive Status */ + { + if (cdd.latency == 0) { + /* RS1-RS8 normally unchanged */ + Pico_mcd->s68k_regs[0x38+0] = cdd.status; + + /* unless RS1 indicated invalid track infos */ + if (Pico_mcd->s68k_regs[0x38+1] == 0x0f || + Pico_mcd->s68k_regs[0x38+1] == 0x00 || + Pico_mcd->s68k_regs[0x38+1] == 0x01) + { + int lba = cdd.lba + 150 - cdd.latency; + if (Pico_mcd->s68k_regs[0x38+1] == 0x01) + lba = abs(cdd.lba - cdd.toc.tracks[cdd.index].start); + if (Pico_mcd->s68k_regs[0x38+1] == 0x0f) + Pico_mcd->s68k_regs[0x38+1] = 0x00; + set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); + set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); + set_reg16(0x3e, lut_BCD_16[(lba%75)]); + Pico_mcd->s68k_regs[0x40+0] = is_audio(cdd.index) ? 0x00 : 0x04; + } else if (Pico_mcd->s68k_regs[0x38+1] == 0x02) { + /* then return valid track infos, e.g current track number in RS2-RS3 (fixes Lunar - The Silver Star) */ + Pico_mcd->s68k_regs[0x38+1] = 0x02; + set_reg16(0x3a, (cdd.index < cdd.toc.last) ? lut_BCD_16[cdd.index + 1] : 0x0A0A); + } + } + break; + } + + case 0x01: /* Stop Drive */ + { + /* update status */ + cdd.status = cdd.loaded ? CD_STOP : NO_DISC; + + /* no audio track playing */ + Pico_mcd->s68k_regs[0x36+0] = 0x01; + + /* RS1-RS8 ignored, expects 0x0 ("no disc" ?) in RS0 once */ + set_reg16(0x38, 0x0000); + set_reg16(0x3a, 0x0000); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); + set_reg16(0x40, 0x000f); + return; + } + + case 0x02: /* Read TOC */ + { + if (cdd.status == NO_DISC) + cdd.status = cdd.loaded ? CD_STOP : NO_DISC; + + /* Infos automatically retrieved by CDD processor from Q-Channel */ + /* commands 0x00-0x02 (current block) and 0x03-0x05 (Lead-In) */ + switch (Pico_mcd->s68k_regs[0x44+1]) + { + case 0x00: /* Current Absolute Time (MM:SS:FF) */ + { + int lba = cdd.lba + 150; + set_reg16(0x38, cdd.status << 8); + set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); + set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); + set_reg16(0x3e, lut_BCD_16[(lba%75)]); + Pico_mcd->s68k_regs[0x40+0] = is_audio(cdd.index) ? 0x00 : 0x04; /* Current block flags in RS8 (bit0 = mute status, bit1: pre-emphasis status, bit2: track type) */ + break; + } + + case 0x01: /* Current Track Relative Time (MM:SS:FF) */ + { + int lba = cdd.lba - cdd.toc.tracks[cdd.index].start; + if (lba < 0) lba = 0; + set_reg16(0x38, (cdd.status << 8) | 0x01); + set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); + set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); + set_reg16(0x3e, lut_BCD_16[(lba%75)]); + Pico_mcd->s68k_regs[0x40+0] = is_audio(cdd.index) ? 0x00 : 0x04; /* Current block flags in RS8 (bit0 = mute status, bit1: pre-emphasis status, bit2: track type) */ + break; + } + + case 0x02: /* Current Track Number */ + { + set_reg16(0x38, (cdd.status << 8) | 0x02); + set_reg16(0x3a, (cdd.index < cdd.toc.last) ? lut_BCD_16[cdd.index + 1] : 0x0A0A); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); /* Disk Control Code (?) in RS6 */ + Pico_mcd->s68k_regs[0x40+0] = 0x00; + break; + } + + case 0x03: /* Total length (MM:SS:FF) */ + { + int lba = cdd.toc.end + 150; + set_reg16(0x38, (cdd.status << 8) | 0x03); + set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); + set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); + set_reg16(0x3e, lut_BCD_16[(lba%75)]); + Pico_mcd->s68k_regs[0x40+0] = 0x00; + break; + } + + case 0x04: /* First & Last Track Numbers */ + { + set_reg16(0x38, (cdd.status << 8) | 0x04); + set_reg16(0x3a, 0x0001); + set_reg16(0x3c, lut_BCD_16[cdd.toc.last]); + set_reg16(0x3e, 0x0000); /* Drive Version (?) in RS6-RS7 */ + Pico_mcd->s68k_regs[0x40+0] = 0x00; /* Lead-In flags in RS8 (bit0 = mute status, bit1: pre-emphasis status, bit2: track type) */ + break; + } + + case 0x05: /* Track Start Time (MM:SS:FF) */ + { + int track = Pico_mcd->s68k_regs[0x46+0] * 10 + Pico_mcd->s68k_regs[0x46+1]; + int lba = cdd.toc.tracks[track-1].start + 150; + set_reg16(0x38, (cdd.status << 8) | 0x05); + set_reg16(0x3a, lut_BCD_16[(lba/75)/60]); + set_reg16(0x3c, lut_BCD_16[(lba/75)%60]); + set_reg16(0x3e, lut_BCD_16[(lba%75)]); + Pico_mcd->s68k_regs[0x40+0] = track % 10; /* Track Number (low digit) */ + if (track == 1) + { + /* RS6 bit 3 is set for the first (DATA) track */ + Pico_mcd->s68k_regs[0x3e + 0] |= 0x08; + } + break; + } + + case 0x06: /* Latest Error Information */ + { + set_reg16(0x38, (cdd.status << 8) | 0x06); + set_reg16(0x3a, 0x0000); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); + Pico_mcd->s68k_regs[0x40+0] = 0x00; + break; + } + + default: + { +#ifdef LOG_ERROR + error("Unknown CDD Command %02X (%X)\n", Pico_mcd->s68k_regs[0x44+1], s68k.pc); +#endif + return; + } + } + break; + } + + case 0x03: /* Play */ + { + /* reset track index */ + int index = 0; + + /* new LBA position */ + int lba = ((Pico_mcd->s68k_regs[0x44+0] * 10 + Pico_mcd->s68k_regs[0x44+1]) * 60 + + (Pico_mcd->s68k_regs[0x46+0] * 10 + Pico_mcd->s68k_regs[0x46+1])) * 75 + + (Pico_mcd->s68k_regs[0x48+0] * 10 + Pico_mcd->s68k_regs[0x48+1]) - 150; + + /* if drive is currently reading, another block or 2 are decoded before the seek starts */ + if (cdd.status == CD_PLAY && !(Pico_mcd->m.state_flags & PCD_ST_CDD_CMD)) { + Pico_mcd->m.state_flags |= PCD_ST_CDD_CMD; + return; + } + + /* CD drive latency */ + if (!cdd.latency) + { + /* Fixes a few games hanging during intro because they expect data to be read with some delay */ + /* Radical Rex needs at least one interrupt delay */ + /* Wolf Team games (Anet Futatabi, Cobra Command, Road Avenger & Time Gal) need at least 6 interrupts delay */ + /* Space Adventure Cobra (2nd morgue scene) needs at least 13 interrupts delay (incl. seek time, so 6 is OK) */ + /* Jeopardy & ESPN Sunday Night NFL are picky about this as well: 10 interrupts delay (+ seek time) seems OK */ + cdd.latency = 11; + } + + /* CD drive seek time */ + /* max. seek time = 1.5 s = 1.5 x 75 = 112.5 CDD interrupts (rounded to 120) for 270000 sectors max on disc. */ + /* Note: This is only a rough approximation since, on real hardware, seek time is much likely not linear and */ + /* latency much larger than above value, but this model works fine for Sonic CD (track 26 playback needs to */ + /* be enough delayed to start in sync with intro sequence, as compared with real hardware recording). */ + if (lba > cdd.lba) + { + cdd.latency += (((lba - cdd.lba) * 120) / 270000); + } + else + { + cdd.latency += (((cdd.lba - lba) * 120) / 270000); + } + + /* block transfer always starts 3 blocks earlier */ + lba -= 3; + + /* get track index */ + while ((cdd.toc.tracks[index].end <= lba) && (index < cdd.toc.last)) index++; + + /* seek to block */ + cdd_seek(index, lba); + + /* no audio track playing (yet) */ + Pico_mcd->s68k_regs[0x36+0] = 0x01; + + /* update status */ + cdd.status = CD_PLAY; + + /* return track index in RS2-RS3 */ + set_reg16(0x38, (CD_SEEK << 8) | 0x0f); + set_reg16(0x3a, 0x0000); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); + set_reg16(0x40, ~(CD_SEEK + 0xf) & 0x0f); + return; + } + + case 0x04: /* Seek */ + { + /* reset track index */ + int index = 0; + + /* new LBA position */ + int lba = ((Pico_mcd->s68k_regs[0x44+0] * 10 + Pico_mcd->s68k_regs[0x44+1]) * 60 + + (Pico_mcd->s68k_regs[0x46+0] * 10 + Pico_mcd->s68k_regs[0x46+1])) * 75 + + (Pico_mcd->s68k_regs[0x48+0] * 10 + Pico_mcd->s68k_regs[0x48+1]) - 150; + + /* if drive is currently reading, another block or 2 are decoded before the seek starts */ + if (cdd.status == CD_PLAY && !(Pico_mcd->m.state_flags & PCD_ST_CDD_CMD)) { + Pico_mcd->m.state_flags |= PCD_ST_CDD_CMD; + return; + } + + /* CD drive seek time */ + /* We are using similar linear model as above, although still not exactly accurate, */ + /* it works fine for Switch/Panic! intro (Switch needs at least 30 interrupts while */ + /* seeking from 00:05:63 to 24:03:19, Panic! when seeking from 00:05:60 to 24:06:07) */ + if (lba > cdd.lba) + { + cdd.latency = ((lba - cdd.lba) * 120) / 270000; + } + else + { + cdd.latency = ((cdd.lba - lba) * 120) / 270000; + } + + /* get track index */ + while ((cdd.toc.tracks[index].end <= lba) && (index < cdd.toc.last)) index++; + + /* seek to block */ + cdd_seek(index, lba); + + /* no audio track playing */ + Pico_mcd->s68k_regs[0x36+0] = 0x01; + + /* update status */ + cdd.status = CD_READY; + + /* unknown RS1-RS8 values (returning 0xF in RS1 invalidates track infos in RS2-RS8 and fixes Final Fight CD intro when seek time is emulated) */ + set_reg16(0x38, (CD_SEEK << 8) | 0x0f); + set_reg16(0x3a, 0x0000); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); + set_reg16(0x40, ~(CD_SEEK + 0xf) & 0x0f); + return; + } + + case 0x06: /* Pause */ + { + /* if drive is currently reading, another block or 2 are decoded before the seek starts */ + if (cdd.status == CD_PLAY && !(Pico_mcd->m.state_flags & PCD_ST_CDD_CMD)) { + Pico_mcd->m.state_flags |= PCD_ST_CDD_CMD; + return; + } + + /* no audio track playing */ + Pico_mcd->s68k_regs[0x36+0] = 0x01; + + /* update status (RS1-RS8 unchanged) */ + cdd.status = Pico_mcd->s68k_regs[0x38+0] = CD_READY; + break; + } + + case 0x07: /* Resume */ + { + int lba = (cdd.lba < 0 ? 0 : cdd.lba); + + /* CD drive latency */ + if (!cdd.latency) + { + cdd.latency = 11; + } + + /* always restart 4 blocks earlier */ + cdd_seek(cdd.index, lba - 4); + + /* update status (RS1-RS8 unchanged) */ + cdd.status = Pico_mcd->s68k_regs[0x38+0] = CD_PLAY; + break; + } + + case 0x08: /* Forward Scan */ + { + /* reset scanning direction / speed */ + cdd.scanOffset = CD_SCAN_SPEED; + + /* update status (RS1-RS8 unchanged) */ + cdd.status = Pico_mcd->s68k_regs[0x38+0] = CD_SCAN; + break; + } + + case 0x09: /* Rewind Scan */ + { + /* reset scanning direction / speed */ + cdd.scanOffset = -CD_SCAN_SPEED; + + /* update status (RS1-RS8 unchanged) */ + cdd.status = Pico_mcd->s68k_regs[0x38+0] = CD_SCAN; + break; + } + + + case 0x0a: /* N-Track Jump Control ? (usually sent before CD_SEEK or CD_PLAY commands) */ + { + /* TC3 corresponds to seek direction (00=forward, FF=reverse) */ + /* TC4-TC7 are related to seek length (4x4 bits i.e parameter values are between -65535 and +65535) */ + /* Maybe related to number of auto-sequenced track jumps/moves for CD DSP (cf. CXD2500BQ datasheet) */ + /* also see US Patent nr. 5222054 for a detailled description of seeking operation using Track Jump */ + + /* no audio track playing */ + Pico_mcd->s68k_regs[0x36+0] = 0x01; + + /* update status (RS1-RS8 unchanged) */ + cdd.status = Pico_mcd->s68k_regs[0x38+0] = CD_READY; + break; + } + + case 0x0c: /* Close Tray */ + { + /* no audio track playing */ + Pico_mcd->s68k_regs[0x36+0] = 0x01; + + /* update status */ + cdd.status = cdd.loaded ? CD_STOP : NO_DISC; + + /* RS1-RS8 ignored, expects 0x0 ("no disc" ?) in RS0 once */ + set_reg16(0x38, 0x0000); + set_reg16(0x3a, 0x0000); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); + set_reg16(0x40, 0x000f); + + if (PicoIn.mcdTrayClose) + PicoIn.mcdTrayClose(); + + return; + } + + case 0x0d: /* Open Tray */ + { + /* no audio track playing */ + Pico_mcd->s68k_regs[0x36+0] = 0x01; + + /* update status (RS1-RS8 ignored) */ + cdd.status = CD_OPEN; + set_reg16(0x38, CD_OPEN << 8); + set_reg16(0x3a, 0x0000); + set_reg16(0x3c, 0x0000); + set_reg16(0x3e, 0x0000); + set_reg16(0x40, ~CD_OPEN & 0x0f); + + if (PicoIn.mcdTrayOpen) + PicoIn.mcdTrayOpen(); + return; + } + + default: /* Unknown command */ +#ifdef LOG_CDD + error("Unknown CDD Command !!!\n"); +#endif + Pico_mcd->s68k_regs[0x38+0] = cdd.status; + break; + } + + /* only compute checksum when necessary */ + Pico_mcd->s68k_regs[0x40 + 1] = + ~(Pico_mcd->s68k_regs[0x38 + 0] + Pico_mcd->s68k_regs[0x38 + 1] + + Pico_mcd->s68k_regs[0x3a + 0] + Pico_mcd->s68k_regs[0x3a + 1] + + Pico_mcd->s68k_regs[0x3c + 0] + Pico_mcd->s68k_regs[0x3c + 1] + + Pico_mcd->s68k_regs[0x3e + 0] + Pico_mcd->s68k_regs[0x3e + 1] + + Pico_mcd->s68k_regs[0x40 + 0]) & 0x0f; +} + +// vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/cd/cdd.h b/pico/cd/cdd.h new file mode 100644 index 00000000..d896e256 --- /dev/null +++ b/pico/cd/cdd.h @@ -0,0 +1,100 @@ +/*************************************************************************************** + * Genesis Plus + * CD drive processor & CD-DA fader + * + * Copyright (C) 2012-2013 Eke-Eke (Genesis Plus GX) + * + * Redistribution and use of this code or any derivative works are permitted + * provided that the following conditions are met: + * + * - Redistributions may not be sold, nor may they be used in a commercial + * product or activity. + * + * - Redistributions that are modified from the original source must include the + * complete source code, including the source code for all components used by a + * binary built from the modified sources. However, as a special exception, the + * source code distributed need not include anything that is normally distributed + * (in either source or binary form) with the major components (compiler, kernel, + * and so on) of the operating system on which the executable runs, unless that + * component itself accompanies the executable. + * + * - Redistributions must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other + * materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ****************************************************************************************/ +#ifndef _HW_CDD_ +#define _HW_CDD_ + +#ifdef USE_LIBTREMOR +#include "tremor/ivorbisfile.h" +#endif + +/* CDD status */ +#define NO_DISC 0x00 +#define CD_PLAY 0x01 +#define CD_SEEK 0x02 +#define CD_SCAN 0x03 +#define CD_READY 0x04 +#define CD_OPEN 0x05 /* similar to 0x0E ? */ +#define CD_STOP 0x09 +#define CD_END 0x0C + +/* CD blocks scanning speed */ +#define CD_SCAN_SPEED 30 + +#define CD_MAX_TRACKS 100 + +/* CD track */ +typedef struct +{ + int type; + char *fname; + void *fd; +#ifdef USE_LIBTREMOR + OggVorbis_File vf; +#endif + int offset; + int start; + int end; +} track_t; + +/* CD TOC */ +typedef struct +{ + int end; + int last; + track_t tracks[CD_MAX_TRACKS]; +} toc_t; + +/* CDD hardware */ +typedef struct +{ + uint32 cycles; + uint32 latency; + int loaded; + int index; + int lba; + int scanOffset; + int volume; + uint8 status; + uint16 sectorSize; + toc_t toc; + int16 audio[2]; +} cdd_t; + +extern cdd_t cdd; + +#endif diff --git a/pico/cd/cue.h b/pico/cd/cue.h deleted file mode 100644 index 70ade53d..00000000 --- a/pico/cd/cue.h +++ /dev/null @@ -1,29 +0,0 @@ - -typedef enum -{ - CT_UNKNOWN = 0, - CT_ISO = 1, /* 2048 B/sector */ - CT_BIN = 2, /* 2352 B/sector */ - CT_MP3 = 3, - CT_WAV = 4 -} cue_track_type; - -typedef struct -{ - char *fname; - int pregap; /* pregap for current track */ - int sector_offset; /* in current file */ - int sector_xlength; - cue_track_type type; -} cue_track; - -typedef struct -{ - int track_count; - cue_track tracks[0]; -} cue_data_t; - - -cue_data_t *cue_parse(const char *fname); -void cue_destroy(cue_data_t *data); - diff --git a/pico/cd/genplus_macros.h b/pico/cd/genplus_macros.h index 8ac5d35b..a665e5c2 100644 --- a/pico/cd/genplus_macros.h +++ b/pico/cd/genplus_macros.h @@ -5,15 +5,15 @@ #undef int16 #undef int32 -#define uint8 unsigned char -#define uint16 unsigned short -#define uint32 unsigned int -#define int8 signed char -#define int16 signed short -#define int32 signed int +#define uint8 u8 +#define uint16 u16 +#define uint32 u32 +#define int8 s8 +#define int16 s16 +#define int32 s32 -#define READ_BYTE(BASE, ADDR) (BASE)[(ADDR)^1] -#define WRITE_BYTE(BASE, ADDR, VAL) (BASE)[(ADDR)^1] = (VAL) +#define READ_BYTE(BASE, ADDR) (BASE)[MEM_BE2(ADDR)] +#define WRITE_BYTE(BASE, ADDR, VAL) (BASE)[MEM_BE2(ADDR)] = (VAL) #define load_param(param, size) \ memcpy(param, &state[bufferptr], size); \ diff --git a/pico/cd/gfx.c b/pico/cd/gfx.c index 948402bd..4df1c34f 100644 --- a/pico/cd/gfx.c +++ b/pico/cd/gfx.c @@ -43,6 +43,7 @@ typedef struct //uint32 cycles; /* current cycles count for graphics operation */ //uint32 cyclesPerLine; /* current graphics operation timings */ uint32 dotMask; /* stamp map size mask */ + uint32 stampMask; /* stamp number mask */ uint16 *tracePtr; /* trace vector pointer */ uint16 *mapPtr; /* stamp map table base address */ uint8 stampShift; /* stamp pixel shift value (related to stamp size) */ @@ -52,7 +53,8 @@ typedef struct uint32 y_step; /* pico: render line step */ uint8 lut_prio[4][0x10][0x10]; /* WORD-RAM data writes priority lookup table */ uint8 lut_pixel[0x200]; /* Graphics operation dot offset lookup table */ - uint8 lut_cell[0x100]; /* Graphics operation stamp offset lookup table */ + uint16 lut_cell2[0x80]; /* Graphics operation stamp offset lookup table */ + uint16 lut_cell4[0x80]; /* Graphics operation stamp offset lookup table */ } gfx_t; static gfx_t gfx; @@ -66,7 +68,7 @@ static void gfx_schedule(void); void gfx_init(void) { int i, j; - uint8 mask, row, col, temp; + uint8 row, col, temp; memset(&gfx, 0, sizeof(gfx)); @@ -87,24 +89,23 @@ void gfx_init(void) } /* Initialize cell lookup table */ - /* table entry = yyxxshrr (8 bits) */ + /* table entry = yyxxhrr (7 bits) */ /* with: yy = cell row (0-3) */ /* xx = cell column (0-3) */ - /* s = stamp size (0=16x16, 1=32x32) */ /* hrr = HFLIP & ROTATION bits */ - for (i=0; i<0x100; i++) + for (i=0; i<0x80; i++) { /* one stamp = 2x2 cells (16x16) or 4x4 cells (32x32) */ - mask = (i & 8) ? 3 : 1; - row = (i >> 6) & mask; - col = (i >> 4) & mask; + row = (i >> 5) & 3; + col = (i >> 3) & 3; - if (i & 4) { col = col ^ mask; } /* HFLIP (always first) */ - if (i & 2) { col = col ^ mask; row = row ^ mask; } /* ROLL1 */ - if (i & 1) { temp = col; col = row ^ mask; row = temp; } /* ROLL0 */ + if (i & 4) { col = col ^ 3; } /* HFLIP (always first) */ + if (i & 2) { col = col ^ 3; row = row ^ 3; } /* ROLL1 */ + if (i & 1) { temp = col; col = row ^ 3; row = temp; } /* ROLL0 */ /* cell offset (0-3 or 0-15) */ - gfx.lut_cell[i] = row + col * (mask + 1); + gfx.lut_cell2[i] = ((row&1) + (col&1) * 2) << 6; + gfx.lut_cell4[i] = ((row&3) + (col&3) * 4) << 6; } /* Initialize pixel lookup table */ @@ -175,12 +176,136 @@ int gfx_context_load(const uint8 *state) return bufferptr; } +static inline int gfx_pixel(uint32 xpos, uint32 ypos, uint16 *lut_cell) +{ + uint16 stamp_data; + uint32 stamp_index; + uint8 pixel_out = 0x00; + + /* check if pixel is outside stamp map */ + if (((xpos | ypos) & ~gfx.dotMask) == 0) + { + /* read stamp map table data */ + stamp_data = gfx.mapPtr[(xpos >> gfx.stampShift) | ((ypos >> gfx.stampShift) << gfx.mapShift)]; + + /* stamp generator base index */ + /* sss ssssssss ccyyyxxx (16x16) or sss sssssscc ccyyyxxx (32x32) */ + /* with: s = stamp number (1 stamp = 16x16 or 32x32 pixels) */ + /* c = cell offset (0-3 for 16x16, 0-15 for 32x32) */ + /* yyy = line offset (0-7) */ + /* xxx = pixel offset (0-7) */ + stamp_index = (stamp_data & gfx.stampMask) << 8; + + if (stamp_index) + { + /* extract HFLIP & ROTATION bits */ + stamp_data = (stamp_data >> 13) & 7; + + /* cell offset (0-3 or 0-15) */ + /* table entry = yyxxhrr (7 bits) */ + /* with: yy = cell row (0-3) = (ypos >> (11 + 3)) & 3 */ + /* xx = cell column (0-3) = (xpos >> (11 + 3)) & 3 */ + /* hrr = HFLIP & ROTATION bits */ + stamp_index |= lut_cell[stamp_data | ((ypos >> 9) & 0x60) | ((xpos >> 11) & 0x18)]; + + /* pixel offset (0-63) */ + /* table entry = yyyxxxhrr (9 bits) */ + /* with: yyy = pixel row (0-7) = (ypos >> 11) & 7 */ + /* xxx = pixel column (0-7) = (xpos >> 11) & 7 */ + /* hrr = HFLIP & ROTATION bits */ + stamp_index |= gfx.lut_pixel[stamp_data | ((ypos >> 5) & 0x1c0) | ((xpos >> 8) & 0x38)]; + + /* read pixel pair (2 pixels/byte) */ + pixel_out = READ_BYTE(Pico_mcd->word_ram2M, stamp_index >> 1); + + /* extract left or right pixel */ + pixel_out >>= 4 * !(stamp_index & 1); + pixel_out &= 0x0f; + } + } + + return pixel_out; +} + +#define RENDER_LOOP(N, UPDP, COND1, COND2) do { \ + if (bufferIndex & 1) { \ + bufferIndex ^= 1; \ + goto right##N; /* no initial left pixel */ \ + } \ + /* process all dots */ \ + while (width--) \ + { \ + /* left pixel */ \ + xpos &= mask; \ + ypos &= mask; \ + \ + if (COND1) { \ + pixel_out = gfx_pixel(xpos, ypos, lut_cell); \ + UPDP; \ + } \ + \ + if (COND2) { \ + /* read out paired pixel data */ \ + pixel_in = READ_BYTE(Pico_mcd->word_ram2M, bufferIndex >> 1); \ + \ + /* priority mode write */ \ + pixel_in = (lut_prio[(pixel_in & 0xf0) >> 4][pixel_out] << 4) | \ + (pixel_in & 0x0f); \ + \ + /* write data to image buffer */ \ + WRITE_BYTE(Pico_mcd->word_ram2M, bufferIndex >> 1, pixel_in); \ + } \ + \ + /* increment pixel position */ \ + xpos += xoffset; \ + ypos += yoffset; \ + \ +right##N: \ + if (width-- == 0) break; \ + \ + /* right pixel */ \ + xpos &= mask; \ + ypos &= mask; \ + \ + if (COND1) { \ + pixel_out = gfx_pixel(xpos, ypos, lut_cell); \ + UPDP; \ + } \ + \ + if (COND2) { \ + /* read out paired pixel data */ \ + pixel_in = READ_BYTE(Pico_mcd->word_ram2M, bufferIndex >> 1); \ + \ + /* priority mode write */ \ + pixel_in = (lut_prio[pixel_in & 0x0f][pixel_out]) | \ + (pixel_in & 0xf0); \ + \ + /* write data to image buffer */ \ + WRITE_BYTE(Pico_mcd->word_ram2M, bufferIndex >> 1, pixel_in); \ + } \ + \ + /* increment pixel position */ \ + xpos += xoffset; \ + ypos += yoffset; \ + \ + /* next pixel */ \ + bufferIndex += 2; \ + /* check current pixel position */ \ + if ((bufferIndex & 7) == 0) \ + { \ + /* next cell: increment buffer offset by one column (minus 8 pixels) */ \ + bufferIndex += gfx.bufferOffset-1; \ + } \ + } \ +} while (0) + static void gfx_render(uint32 bufferIndex, uint32 width) { uint8 pixel_in, pixel_out; - uint16 stamp_data; - uint32 stamp_index; uint32 priority; + uint8 (*lut_prio)[0x10]; + uint16 *lut_cell; + uint32 mask; /* pixel map start position for current line (13.3 format converted to 13.11) */ uint32 xpos = *gfx.tracePtr++ << 8; @@ -192,131 +317,35 @@ static void gfx_render(uint32 bufferIndex, uint32 width) priority = (Pico_mcd->s68k_regs[2] << 8) | Pico_mcd->s68k_regs[3]; priority = (priority >> 3) & 0x03; + lut_prio = gfx.lut_prio[priority]; - /* process all dots */ - while (width--) + lut_cell = (Pico_mcd->s68k_regs[0x58+1] & 0x02) ? gfx.lut_cell4 : gfx.lut_cell2; + + /* check if stamp map is repeated */ + mask = 0xffffff; /* 24-bit range */ + if (Pico_mcd->s68k_regs[0x58+1] & 0x01) { - /* check if stamp map is repeated */ - if (Pico_mcd->s68k_regs[0x58+1] & 0x01) - { - /* stamp map range */ - xpos &= gfx.dotMask; - ypos &= gfx.dotMask; - } - else - { - /* 24-bit range */ - xpos &= 0xffffff; - ypos &= 0xffffff; - } + /* stamp map range */ + mask = gfx.dotMask; + } - /* check if pixel is outside stamp map */ - if ((xpos | ypos) & ~gfx.dotMask) - { - /* force pixel output to 0 */ - pixel_out = 0x00; - } - else - { - /* read stamp map table data */ - stamp_data = gfx.mapPtr[(xpos >> gfx.stampShift) | ((ypos >> gfx.stampShift) << gfx.mapShift)]; - - /* stamp generator base index */ - /* sss ssssssss ccyyyxxx (16x16) or sss sssssscc ccyyyxxx (32x32) */ - /* with: s = stamp number (1 stamp = 16x16 or 32x32 pixels) */ - /* c = cell offset (0-3 for 16x16, 0-15 for 32x32) */ - /* yyy = line offset (0-7) */ - /* xxx = pixel offset (0-7) */ - stamp_index = (stamp_data & 0x7ff) << 8; - - if (stamp_index) - { - /* extract HFLIP & ROTATION bits */ - stamp_data = (stamp_data >> 13) & 7; - - /* cell offset (0-3 or 0-15) */ - /* table entry = yyxxshrr (8 bits) */ - /* with: yy = cell row (0-3) = (ypos >> (11 + 3)) & 3 */ - /* xx = cell column (0-3) = (xpos >> (11 + 3)) & 3 */ - /* s = stamp size (0=16x16, 1=32x32) */ - /* hrr = HFLIP & ROTATION bits */ - stamp_index |= gfx.lut_cell[ - stamp_data | ((Pico_mcd->s68k_regs[0x58+1] & 0x02) << 2 ) - | ((ypos >> 8) & 0xc0) | ((xpos >> 10) & 0x30)] << 6; - - /* pixel offset (0-63) */ - /* table entry = yyyxxxhrr (9 bits) */ - /* with: yyy = pixel row (0-7) = (ypos >> 11) & 7 */ - /* xxx = pixel column (0-7) = (xpos >> 11) & 7 */ - /* hrr = HFLIP & ROTATION bits */ - stamp_index |= gfx.lut_pixel[stamp_data | ((xpos >> 8) & 0x38) | ((ypos >> 5) & 0x1c0)]; - - /* read pixel pair (2 pixels/byte) */ - pixel_out = READ_BYTE(Pico_mcd->word_ram2M, stamp_index >> 1); - - /* extract left or rigth pixel */ - if (stamp_index & 1) - { - pixel_out &= 0x0f; - } - else - { - pixel_out >>= 4; - } - } - else - { - /* stamp 0 is not used: force pixel output to 0 */ - pixel_out = 0x00; - } - } - - /* read out paired pixel data */ - pixel_in = READ_BYTE(Pico_mcd->word_ram2M, bufferIndex >> 1); - - /* update left or rigth pixel */ - if (bufferIndex & 1) - { - /* priority mode write */ - pixel_out = gfx.lut_prio[priority][pixel_in & 0x0f][pixel_out]; - - pixel_out |= (pixel_in & 0xf0); - } - else - { - /* priority mode write */ - pixel_out = gfx.lut_prio[priority][pixel_in >> 4][pixel_out]; - - pixel_out = (pixel_out << 4) | (pixel_in & 0x0f); - } - - /* write data to image buffer */ - WRITE_BYTE(Pico_mcd->word_ram2M, bufferIndex >> 1, pixel_out); - - /* check current pixel position */ - if ((bufferIndex & 7) != 7) - { - /* next pixel */ - bufferIndex++; - } - else - { - /* next cell: increment image buffer offset by one column (minus 7 pixels) */ - bufferIndex += gfx.bufferOffset; - } - - /* increment pixel position */ - xpos += xoffset; - ypos += yoffset; + pixel_out = 0; + if (xoffset+(1U<<10) <= 1U<<11 && yoffset+(1U<<10) <= 1U<<11) { + /* upscaling >= 2x, test for duplicate pixels to avoid recalculation */ + uint32 oldx, oldy; + oldx = oldy = ~xpos; + RENDER_LOOP(1, oldx = xpos;oldy = ypos, (oldx^xpos ^ oldy^ypos) >> 11, (!priority) | pixel_out); + } else { + RENDER_LOOP(3, , 1, (!priority) | pixel_out); } } -void gfx_start(unsigned int base) +void gfx_start(uint32 base) { /* make sure 2M mode is enabled */ if (!(Pico_mcd->s68k_regs[3] & 0x04)) { - uint32 mask; + uint32 mask = 0; uint32 reg; /* trace vector pointer */ @@ -327,28 +356,32 @@ void gfx_start(unsigned int base) { case 0: gfx.dotMask = 0x07ffff; /* 256x256 dots/map */ - gfx.stampShift = 11 + 4; /* 16x16 dots/stamps */ + gfx.stampMask = 0x7ff; /* 16x16 dots/stamp */ + gfx.stampShift = 11 + 4; /* 16x16 dots/stamp */ gfx.mapShift = 4; /* 16x16 stamps/map */ mask = 0x3fe00; /* 512 bytes/table */ break; case 1: gfx.dotMask = 0x07ffff; /* 256x256 dots/map */ - gfx.stampShift = 11 + 5; /* 32x32 dots/stamps */ + gfx.stampMask = 0x7fc; /* 16x16 dots/stamp */ + gfx.stampShift = 11 + 5; /* 32x32 dots/stamp */ gfx.mapShift = 3; /* 8x8 stamps/map */ mask = 0x3ff80; /* 128 bytes/table */ break; case 2: gfx.dotMask = 0x7fffff; /* 4096*4096 dots/map */ - gfx.stampShift = 11 + 4; /* 16x16 dots/stamps */ + gfx.stampMask = 0x7ff; /* 16x16 dots/stamp */ + gfx.stampShift = 11 + 4; /* 16x16 dots/stamp */ gfx.mapShift = 8; /* 256x256 stamps/map */ mask = 0x20000; /* 131072 bytes/table */ break; case 3: gfx.dotMask = 0x7fffff; /* 4096*4096 dots/map */ - gfx.stampShift = 11 + 5; /* 32x32 dots/stamps */ + gfx.stampMask = 0x7fc; /* 16x16 dots/stamp */ + gfx.stampShift = 11 + 5; /* 32x32 dots/stamp */ gfx.mapShift = 7; /* 128x128 stamps/map */ mask = 0x38000; /* 32768 bytes/table */ break; @@ -376,6 +409,8 @@ void gfx_start(unsigned int base) /* start graphics operation */ Pico_mcd->s68k_regs[0x58] = 0x80; + Pico_mcd->m.state_flags &= ~PCD_ST_S68K_POLL; + Pico_mcd->m.s68k_poll_cnt = 0; gfx_schedule(); } @@ -393,10 +428,9 @@ static void gfx_schedule(void) h = (Pico_mcd->s68k_regs[0x64] << 8) | Pico_mcd->s68k_regs[0x65]; cycles = 5 * w * h; + y_step = h; if (cycles > UPDATE_CYCLES) y_step = (UPDATE_CYCLES + 5 * w - 1) / (5 * w); - else - y_step = h; gfx.y_step = y_step; pcd_event_schedule_s68k(PCD_EVENT_GFX, 5 * w * y_step); @@ -419,9 +453,11 @@ void gfx_update(unsigned int cycles) Pico_mcd->s68k_regs[0x64] = Pico_mcd->s68k_regs[0x65] = 0; + Pico_mcd->m.state_flags &= ~PCD_ST_S68K_POLL; + Pico_mcd->m.s68k_poll_cnt = 0; if (Pico_mcd->s68k_regs[0x33] & PCDS_IEN1) { elprintf(EL_INTS|EL_CD, "s68k: gfx_cd irq 1"); - SekInterruptS68k(1); + pcd_irq_s68k(1, 1); } } else { @@ -434,7 +470,7 @@ void gfx_update(unsigned int cycles) pcd_event_schedule(cycles, PCD_EVENT_GFX, 5 * w * lines); } - if (PicoOpt & POPT_EN_MCD_GFX) + if (PicoIn.opt & POPT_EN_MCD_GFX) { /* render lines */ while (lines--) diff --git a/pico/cd/gfx_dma.c b/pico/cd/gfx_dma.c index 44fa7ab2..360afedf 100644 --- a/pico/cd/gfx_dma.c +++ b/pico/cd/gfx_dma.c @@ -10,15 +10,11 @@ #include "cell_map.c" -#ifndef UTYPES_DEFINED -typedef unsigned short u16; -#endif - // check: Heart of the alien, jaguar xj 220 -PICO_INTERNAL void DmaSlowCell(unsigned int source, unsigned int a, int len, unsigned char inc) +PICO_INTERNAL void DmaSlowCell(u32 source, u32 a, int len, unsigned char inc) { unsigned char *base; - unsigned int asrc, a2; + u32 asrc, a2; u16 *r; base = Pico_mcd->word_ram1M[Pico_mcd->s68k_regs[3]&1]; @@ -26,23 +22,22 @@ PICO_INTERNAL void DmaSlowCell(unsigned int source, unsigned int a, int len, uns switch (Pico.video.type) { case 1: // vram - r = Pico.vram; + r = PicoMem.vram; for(; len; len--) { asrc = cell_map(source >> 2) << 2; asrc |= source & 2; // if(a&1) d=(d<<8)|(d>>8); // ?? - r[a>>1] = *(u16 *)(base + asrc); + VideoWriteVRAM(a, *(u16 *)(base + asrc)); source += 2; // AutoIncrement a=(u16)(a+inc); } - rendstatus |= PDRAW_SPRITES_MOVED; break; case 3: // cram Pico.m.dirtyPal = 1; - r = Pico.cram; + r = PicoMem.cram; for(a2=a&0x7f; len; len--) { asrc = cell_map(source >> 2) << 2; @@ -58,7 +53,7 @@ PICO_INTERNAL void DmaSlowCell(unsigned int source, unsigned int a, int len, uns break; case 5: // vsram[a&0x003f]=d; - r = Pico.vsram; + r = PicoMem.vsram; for(a2=a&0x7f; len; len--) { asrc = cell_map(source >> 2) << 2; diff --git a/pico/cd/libchdr b/pico/cd/libchdr new file mode 160000 index 00000000..e62ac599 --- /dev/null +++ b/pico/cd/libchdr @@ -0,0 +1 @@ +Subproject commit e62ac5995b1c7ef65ece35293914843b8ee57d49 diff --git a/pico/cd/mcd.c b/pico/cd/mcd.c index 8451a1c2..ca94aab9 100644 --- a/pico/cd/mcd.c +++ b/pico/cd/mcd.c @@ -1,6 +1,7 @@ /* * PicoDrive * (C) notaz, 2007,2013 + * (C) irixxxx, 2019-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -11,29 +12,35 @@ extern unsigned char formatted_bram[4*0x10]; -static unsigned int m68k_cycle_mult; - -void (*PicoMCDopenTray)(void) = NULL; -void (*PicoMCDcloseTray)(void) = NULL; +static unsigned int mcd_m68k_cycle_mult; +static unsigned int mcd_s68k_cycle_mult; +static unsigned int mcd_m68k_cycle_base; +static unsigned int mcd_s68k_cycle_base; +mcd_state *Pico_mcd; PICO_INTERNAL void PicoInitMCD(void) { SekInitS68k(); - Init_CD_Driver(); - gfx_init(); } PICO_INTERNAL void PicoExitMCD(void) { - End_CD_Driver(); + cdd_unload(); + if (Pico_mcd) { + plat_munmap(Pico_mcd, sizeof(mcd_state)); + Pico_mcd = NULL; + } } PICO_INTERNAL void PicoPowerMCD(void) { + int fmt_size; + + SekResetS68k(); SekCycleCntS68k = SekCycleAimS68k = 0; - int fmt_size = sizeof(formatted_bram); + fmt_size = sizeof(formatted_bram); memset(Pico_mcd->prg_ram, 0, sizeof(Pico_mcd->prg_ram)); memset(Pico_mcd->word_ram2M, 0, sizeof(Pico_mcd->word_ram2M)); memset(Pico_mcd->pcm_ram, 0, sizeof(Pico_mcd->pcm_ram)); @@ -44,24 +51,31 @@ PICO_INTERNAL void PicoPowerMCD(void) memset(&Pico_mcd->pcm, 0, sizeof(Pico_mcd->pcm)); memset(&Pico_mcd->m, 0, sizeof(Pico_mcd->m)); - Reset_CD(); + cdc_init(); + gfx_init(); // cold reset state (tested) Pico_mcd->m.state_flags = PCD_ST_S68K_RST; Pico_mcd->m.busreq = 2; // busreq on, s68k in reset Pico_mcd->s68k_regs[3] = 1; // 2M word RAM mode, m68k access - memset(Pico_mcd->bios + 0x70, 0xff, 4); + if (Pico.romsize <= 0x20000) // hack to detect BIOS, no GA HINT vector for MSU + memset(Pico.rom + 0x70, 0xff, 4); } void pcd_soft_reset(void) { - // Reset_CD(); // breaks Fahrenheit CD swap + elprintf(EL_CD, "cd: soft reset"); - LC89510_Reset(); + Pico_mcd->m.s68k_pend_ints = 0; + cdc_reset(); + cdd_reset(); #ifdef _ASM_CD_MEMORY_C //PicoMemResetCDdecode(1); // don't have to call this in 2M mode #endif + memset(&Pico_mcd->s68k_regs[0x38], 0, 9); + Pico_mcd->s68k_regs[0x38+9] = 0x0f; // default checksum + pcd_event_schedule_s68k(PCD_EVENT_CDC, 12500000/75); // TODO: test if register state/timers change @@ -71,21 +85,21 @@ PICO_INTERNAL int PicoResetMCD(void) { // reset button doesn't affect MCD hardware - // use SRam.data for RAM cart - if (PicoOpt & POPT_EN_MCD_RAMCART) { - if (SRam.data == NULL) - SRam.data = calloc(1, 0x12000); + // use Pico.sv.data for RAM cart + if (PicoIn.opt & POPT_EN_MCD_RAMCART) { + if (Pico.sv.data == NULL) + Pico.sv.data = calloc(1, 0x12000); } - else if (SRam.data != NULL) { - free(SRam.data); - SRam.data = NULL; + else if (Pico.sv.data != NULL) { + free(Pico.sv.data); + Pico.sv.data = NULL; } - SRam.start = SRam.end = 0; // unused + Pico.sv.start = Pico.sv.end = 0; // unused return 0; } -static __inline void SekRunS68k(unsigned int to) +static void SekRunS68k(unsigned int to) { int cyc_do; @@ -93,9 +107,7 @@ static __inline void SekRunS68k(unsigned int to) if ((cyc_do = SekCycleAimS68k - SekCycleCntS68k) <= 0) return; - if (SekShouldInterrupt()) - Pico_mcd->m.s68k_poll_a = 0; - + pprof_start(s68k); SekCycleCntS68k += cyc_do; #if defined(EMU_C68K) PicoCpuCS68k.cycles = cyc_do; @@ -106,31 +118,44 @@ static __inline void SekRunS68k(unsigned int to) SekCycleCntS68k += m68k_execute(cyc_do) - cyc_do; m68k_set_context(&PicoCpuMM68k); #elif defined(EMU_F68K) - g_m68kcontext = &PicoCpuFS68k; - SekCycleCntS68k += fm68k_emulate(cyc_do, 0) - cyc_do; - g_m68kcontext = &PicoCpuFM68k; + SekCycleCntS68k += fm68k_emulate(&PicoCpuFS68k, cyc_do, 0) - cyc_do; #endif + SekCyclesLeftS68k = 0; + pprof_end(s68k); } -static void pcd_set_cycle_mult(void) +void PicoMCDPrepare(void) { // ~1.63 for NTSC, ~1.645 for PAL - if (Pico.m.pal) - m68k_cycle_mult = ((12500000ull << 16) / (50*312*488)); - else - m68k_cycle_mult = ((12500000ull << 16) / (60*262*488)) + 1; +#define DIV_ROUND(x,y) ((x)+(y)/2) / (y) // round to nearest, x/y+0.5 -> (x+y/2)/y + unsigned int osc = (Pico.m.pal ? OSC_PAL : OSC_NTSC); + mcd_m68k_cycle_mult = DIV_ROUND(12500000ull << 16, osc / 7); + mcd_s68k_cycle_mult = DIV_ROUND(1ull * osc << 16, 7 * 12500000); } unsigned int pcd_cycles_m68k_to_s68k(unsigned int c) { - return (long long)c * m68k_cycle_mult >> 16; + return (long long)c * mcd_m68k_cycle_mult >> 16; } /* events */ static void pcd_cdc_event(unsigned int now) { // 75Hz CDC update - Check_CD_Command(); + cdd_update(); + + /* check if a new CDD command has been processed */ + if (!(Pico_mcd->s68k_regs[0x4b] & 0xf0)) + { + /* reset CDD command wait flag */ + Pico_mcd->s68k_regs[0x4b] = 0xf0; + + if ((Pico_mcd->s68k_regs[0x33] & PCDS_IEN4) && (Pico_mcd->s68k_regs[0x37] & 4)) { + elprintf(EL_INTS|EL_CD, "s68k: cdd irq 4"); + pcd_irq_s68k(4, 1); + } + } + pcd_event_schedule(now, PCD_EVENT_CDC, 12500000/75); } @@ -138,18 +163,17 @@ static void pcd_int3_timer_event(unsigned int now) { if (Pico_mcd->s68k_regs[0x33] & PCDS_IEN3) { elprintf(EL_INTS|EL_CD, "s68k: timer irq 3"); - SekInterruptS68k(3); + pcd_irq_s68k(3, 1); } if (Pico_mcd->s68k_regs[0x31] != 0) pcd_event_schedule(now, PCD_EVENT_TIMER3, - Pico_mcd->s68k_regs[0x31] * 384); + (Pico_mcd->s68k_regs[0x31]+1) * 384); } static void pcd_dma_event(unsigned int now) { - int ddx = Pico_mcd->s68k_regs[4] & 7; - Update_CDC_TRansfer(ddx); + cdc_dma_update(); } typedef void (event_cb)(unsigned int now); @@ -158,23 +182,23 @@ typedef void (event_cb)(unsigned int now); unsigned int pcd_event_times[PCD_EVENT_COUNT]; static unsigned int event_time_next; static event_cb *pcd_event_cbs[PCD_EVENT_COUNT] = { - [PCD_EVENT_CDC] = pcd_cdc_event, - [PCD_EVENT_TIMER3] = pcd_int3_timer_event, - [PCD_EVENT_GFX] = gfx_update, - [PCD_EVENT_DMA] = pcd_dma_event, + pcd_cdc_event, // PCD_EVENT_CDC + pcd_int3_timer_event, // PCD_EVENT_TIMER3 + gfx_update, // PCD_EVENT_GFX + pcd_dma_event, // PCD_EVENT_DMA }; void pcd_event_schedule(unsigned int now, enum pcd_event event, int after) { unsigned int when; - when = now + after; - if (when == 0) { + if ((now|after) == 0) { // event cancelled pcd_event_times[event] = 0; return; } + when = now + after; when |= 1; elprintf(EL_CD, "cd: new event #%u %u->%u", event, now, when); @@ -186,8 +210,7 @@ void pcd_event_schedule(unsigned int now, enum pcd_event event, int after) void pcd_event_schedule_s68k(enum pcd_event event, int after) { - if (SekCyclesLeftS68k > after) - SekEndRunS68k(after); + SekEndRunS68k(after); pcd_event_schedule(SekCyclesDoneS68k(), event, after); } @@ -231,19 +254,32 @@ static void pcd_run_events(unsigned int until) oldest, event_time_next); } +void pcd_irq_s68k(int irq, int state) +{ + if (state) { + SekInterruptS68k(irq); + Pico_mcd->m.state_flags &= ~PCD_ST_S68K_POLL; + Pico_mcd->m.s68k_poll_cnt = 0; + } else + SekInterruptClearS68k(irq); +} + int pcd_sync_s68k(unsigned int m68k_target, int m68k_poll_sync) { #define now SekCycleCntS68k - unsigned int s68k_target = - (unsigned long long)m68k_target * m68k_cycle_mult >> 16; + unsigned int s68k_target; unsigned int target; + target = m68k_target - mcd_m68k_cycle_base; + s68k_target = mcd_s68k_cycle_base + + ((unsigned long long)target * mcd_m68k_cycle_mult >> 16); + elprintf(EL_CD, "s68k sync to %u, %u->%u", m68k_target, now, s68k_target); if (Pico_mcd->m.busreq != 1) { /* busreq/reset */ SekCycleCntS68k = SekCycleAimS68k = s68k_target; - pcd_run_events(m68k_target); + pcd_run_events(s68k_target); return 0; } @@ -255,7 +291,11 @@ int pcd_sync_s68k(unsigned int m68k_target, int m68k_poll_sync) if (event_time_next && CYCLES_GT(target, event_time_next)) target = event_time_next; - SekRunS68k(target); + if (Pico_mcd->m.state_flags & (PCD_ST_S68K_POLL|PCD_ST_S68K_SLEEP)) + SekCycleCntS68k = SekCycleAimS68k = target; + else + SekRunS68k(target); + if (m68k_poll_sync && Pico_mcd->m.m68k_poll_cnt == 0) break; } @@ -267,37 +307,61 @@ int pcd_sync_s68k(unsigned int m68k_target, int m68k_poll_sync) #define pcd_run_cpus_normal pcd_run_cpus //#define pcd_run_cpus_lockstep pcd_run_cpus -static void SekSyncM68k(void); +static void SekAimM68k(int cyc, int mult); +static int SekSyncM68k(int once); void pcd_run_cpus_normal(int m68k_cycles) { - SekCycleAim += m68k_cycles; - if (SekShouldInterrupt() || Pico_mcd->m.m68k_poll_cnt < 12) - Pico_mcd->m.m68k_poll_cnt = 0; - else if (Pico_mcd->m.m68k_poll_cnt >= 16) { - int s68k_left = pcd_sync_s68k(SekCycleAim, 1); - if (s68k_left <= 0) { + // TODO this is suspicious. ~1 cycle refresh delay every 256 cycles? + SekAimM68k(m68k_cycles, 0x42); // Fhey area + + while (CYCLES_GT(Pico.t.m68c_aim, Pico.t.m68c_cnt)) { + if (SekShouldInterrupt()) { + Pico_mcd->m.state_flags &= ~PCD_ST_M68K_POLL; + Pico_mcd->m.m68k_poll_cnt = 0; + } + +#ifdef USE_POLL_DETECT + if (Pico_mcd->m.state_flags & PCD_ST_M68K_POLL) { + int s68k_left; + // main CPU is polling, (wake and) run sub only + if (Pico_mcd->m.state_flags & (PCD_ST_S68K_POLL|PCD_ST_S68K_SLEEP)) { + Pico_mcd->m.state_flags &= ~(PCD_ST_S68K_POLL|PCD_ST_S68K_SLEEP); + Pico_mcd->m.s68k_poll_cnt = 0; + } + s68k_left = pcd_sync_s68k(Pico.t.m68c_aim, 1); + + Pico.t.m68c_cnt = Pico.t.m68c_aim; + if (s68k_left > 0) + Pico.t.m68c_cnt -= ((long long)s68k_left * mcd_s68k_cycle_mult >> 16); + if (Pico_mcd->m.state_flags & (PCD_ST_S68K_POLL|PCD_ST_S68K_SLEEP)) { + // slave has stopped, wake master to avoid lockups + Pico_mcd->m.state_flags &= ~PCD_ST_M68K_POLL; + Pico_mcd->m.m68k_poll_cnt = 0; + } + elprintf(EL_CDPOLL, "m68k poll [%02x] x%d @%06x", Pico_mcd->m.m68k_poll_a, Pico_mcd->m.m68k_poll_cnt, SekPc); - SekCycleCnt = SekCycleAim; - return; + } else +#endif + SekSyncM68k(1); + if (Pico_mcd->m.state_flags & PCD_ST_S68K_SYNC) { + Pico_mcd->m.state_flags &= ~PCD_ST_S68K_SYNC; + pcd_sync_s68k(Pico.t.m68c_cnt, 0); } - SekCycleCnt = SekCycleAim - (s68k_left * 40220 >> 16); } - - SekSyncM68k(); } void pcd_run_cpus_lockstep(int m68k_cycles) { - unsigned int target = SekCycleAim + m68k_cycles; + unsigned int target = Pico.t.m68c_aim + m68k_cycles; do { - SekCycleAim += 8; - SekSyncM68k(); - pcd_sync_s68k(SekCycleAim, 0); - } while (CYCLES_GT(target, SekCycleAim)); + Pico.t.m68c_aim += 8; + SekSyncM68k(0); + pcd_sync_s68k(Pico.t.m68c_aim, 0); + } while (CYCLES_GT(target, Pico.t.m68c_aim)); - SekCycleAim = target; + Pico.t.m68c_aim = target; } #define PICO_CD @@ -307,12 +371,19 @@ void pcd_run_cpus_lockstep(int m68k_cycles) #include "../pico_cmn.c" +void pcd_prepare_frame(void) +{ + // need this because we can't have direct mapping between + // master<->slave cycle counters because of overflows + mcd_m68k_cycle_base = Pico.t.m68c_aim; + mcd_s68k_cycle_base = SekCycleAimS68k; +} + PICO_INTERNAL void PicoFrameMCD(void) { - if (!(PicoOpt&POPT_ALT_RENDERER)) - PicoFrameStart(); + PicoFrameStart(); - pcd_set_cycle_mult(); + pcd_prepare_frame(); PicoFrameHints(); } @@ -321,7 +392,6 @@ void pcd_state_loaded(void) unsigned int cycles; int diff; - pcd_set_cycle_mult(); pcd_state_loaded_mem(); memset(Pico_mcd->pcm_mixbuf, 0, sizeof(Pico_mcd->pcm_mixbuf)); @@ -330,9 +400,8 @@ void pcd_state_loaded(void) Pico_mcd->pcm_regs_dirty = 1; // old savestates.. - cycles = pcd_cycles_m68k_to_s68k(SekCycleAim); - diff = cycles - SekCycleAimS68k; - if (diff < -1000 || diff > 1000) { + cycles = pcd_cycles_m68k_to_s68k(Pico.t.m68c_aim); + if (CYCLES_GE(cycles - SekCycleAimS68k, 1000)) { SekCycleCntS68k = SekCycleAimS68k = cycles; } if (pcd_event_times[PCD_EVENT_CDC] == 0) { @@ -341,15 +410,17 @@ void pcd_state_loaded(void) if (Pico_mcd->s68k_regs[0x31]) pcd_event_schedule(SekCycleAimS68k, PCD_EVENT_TIMER3, Pico_mcd->s68k_regs[0x31] * 384); - - if (Pico_mcd->scd.Status_CDC & 0x08) - Update_CDC_TRansfer(Pico_mcd->s68k_regs[4] & 7); } diff = cycles - Pico_mcd->pcm.update_cycles; if ((unsigned int)diff > 12500000/50) Pico_mcd->pcm.update_cycles = cycles; + if (Pico_mcd->m.need_sync) { + Pico_mcd->m.state_flags |= PCD_ST_S68K_SYNC; + Pico_mcd->m.need_sync = 0; + } + // reschedule event_time_next = 0; pcd_run_events(SekCycleCntS68k); diff --git a/pico/cd/mcd_arm.s b/pico/cd/mcd_arm.s deleted file mode 100644 index 9159e28b..00000000 --- a/pico/cd/mcd_arm.s +++ /dev/null @@ -1,185 +0,0 @@ -@* -@* CPU scheduling code -@* (C) notaz, 2007-2008 -@* -@* This work is licensed under the terms of MAME license. -@* See COPYING file in the top-level directory. -@* - -@ SekRunPS runs PicoCpuCM68k and PicoCpuCS68k interleaved in steps of PS_STEP_M68K -@ cycles. This is done without calling CycloneRun and jumping directly to -@ Cyclone code to avoid pushing/popping all the registers every time. - - -.equiv PS_STEP_M68K, ((488<<16)/20) @ ~24 - -@ .extern is ignored by gas, we add these here just to see what we depend on. -.extern CycloneJumpTab -.extern CycloneDoInterrupt -.extern PicoCpuCM68k -.extern PicoCpuCS68k -.extern SekCycleAim -.extern SekCycleCnt -.extern SekCycleAimS68k -.extern SekCycleCntS68k - - -.text -.align 4 - - -.global SekRunPS @ cyc_m68k, cyc_s68k - -SekRunPS: - stmfd sp!, {r4-r8,r10,r11,lr} - sub sp, sp, #2*4 @ sp[0] = main_cycle_cnt, sp[4] = run_cycle_cnt - - @ override CycloneEnd for both contexts - ldr r7, =PicoCpuCM68k - ldr lr, =PicoCpuCS68k - ldr r2, =CycloneEnd_M68k - ldr r3, =CycloneEnd_S68k - str r2, [r7,#0x98] - str r3, [lr,#0x98] - - @ update aims - ldr r8, =SekCycleAim - ldr r10,=SekCycleAimS68k - ldr r2, [r8] - ldr r3, [r10] - add r2, r2, r0 - add r3, r3, r1 - str r2, [r8] - str r3, [r10] - - ldr r6, =CycloneJumpTab - ldr r1, =SekCycleCnt - ldr r0, =((488<<16)-PS_STEP_M68K) - str r6, [r7,#0x54] - str r6, [lr,#0x54] @ make copies to avoid literal pools - - @ schedule m68k for the first time.. - ldr r1, [r1] - str r0, [sp] @ main target 'left cycle' counter - sub r1, r2, r1 - subs r5, r1, r0, asr #16 - ble schedule_s68k @ m68k has not enough cycles - - str r5, [sp,#4] @ run_cycle_cnt - b CycloneRunLocal - - - -CycloneEnd_M68k: - ldr r3, =SekCycleCnt - ldr r0, [sp,#4] @ run_cycle_cnt - ldr r1, [r3] - str r4, [r7,#0x40] ;@ Save Current PC + Memory Base - strb r10,[r7,#0x46] ;@ Save Flags (NZCV) - sub r0, r0, r5 @ subtract leftover cycles (which should be negative) - add r0, r0, r1 - str r0, [r3] - -schedule_s68k: - ldr r8, =SekCycleCntS68k - ldr r10,=SekCycleAimS68k - ldr r3, [sp] - ldr r8, [r8] - ldr r10,[r10] - - sub r0, r10, r8 - mov r2, r3 - add r3, r3, r2, asr #1 - add r3, r3, r2, asr #3 @ cycn_s68k = (cycn + cycn/2 + cycn/8) - - subs r5, r0, r3, asr #16 - ble schedule_m68k @ s68k has not enough cycles - - ldr r7, =PicoCpuCS68k - str r5, [sp,#4] @ run_cycle_cnt - b CycloneRunLocal - - - -CycloneEnd_S68k: - ldr r3, =SekCycleCntS68k - ldr r0, [sp,#4] @ run_cycle_cnt - ldr r1, [r3] - str r4, [r7,#0x40] ;@ Save Current PC + Memory Base - strb r10,[r7,#0x46] ;@ Save Flags (NZCV) - sub r0, r0, r5 @ subtract leftover cycles (should be negative) - add r0, r0, r1 - str r0, [r3] - -schedule_m68k: - ldr r1, =PS_STEP_M68K - ldr r3, [sp] @ main_cycle_cnt - ldr r8, =SekCycleCnt - ldr r10,=SekCycleAim - subs r3, r3, r1 - bmi SekRunPS_end - - ldr r8, [r8] - ldr r10,[r10] - str r3, [sp] @ update main_cycle_cnt - sub r0, r10, r8 - - subs r5, r0, r3, asr #16 - ble schedule_s68k @ m68k has not enough cycles - - ldr r7, =PicoCpuCM68k - str r5, [sp,#4] @ run_cycle_cnt - b CycloneRunLocal - - - -SekRunPS_end: - ldr r7, =PicoCpuCM68k - ldr lr, =PicoCpuCS68k - mov r0, #0 - str r0, [r7,#0x98] @ remove CycloneEnd handler - str r0, [lr,#0x98] - @ return - add sp, sp, #2*4 - ldmfd sp!, {r4-r8,r10,r11,pc} - - - -CycloneRunLocal: - ;@ r0-3 = Temporary registers - ldr r4,[r7,#0x40] ;@ r4 = Current PC + Memory Base - ;@ r5 = Cycles - ;@ r6 = Opcode Jump table - ;@ r7 = Pointer to Cpu Context - ;@ r8 = Current Opcode - ldrb r10,[r7,#0x46];@ r10 = Flags (NZCV) - ldr r1,[r7,#0x44] ;@ get SR high and IRQ level - orr r10,r10,r10,lsl #28 ;@ r10 = Flags 0xf0000000, cpsr format - -;@ CheckInterrupt: - movs r0,r1,lsr #24 ;@ Get IRQ level - beq NoIntsLocal - cmp r0,#6 ;@ irq>6 ? - andle r1,r1,#7 ;@ Get interrupt mask - cmple r0,r1 ;@ irq<=6: Is irq<=mask ? - bgt CycloneDoInterrupt -NoIntsLocal: - -;@ Check if our processor is in special state -;@ and jump to opcode handler if not - ldr r0,[r7,#0x58] ;@ state_flags - ldrh r8,[r4],#2 ;@ Fetch first opcode - tst r0,#0x03 ;@ special state? - andeq r10,r10,#0xf0000000 - ldreq pc,[r6,r8,asl #2] ;@ Jump to opcode handler - -CycloneSpecial2: - tst r0,#2 ;@ tracing? - bne CycloneDoTrace -;@ stopped or halted - sub r4,r4,#2 - ldr r1,[r7,#0x98] - mov r5,#0 - bx r1 - -@ vim:filetype=armasm diff --git a/pico/cd/memory.c b/pico/cd/memory.c index b0f5e4a8..99c6610b 100644 --- a/pico/cd/memory.c +++ b/pico/cd/memory.c @@ -1,6 +1,7 @@ /* * Memory I/O handlers for Sega/Mega CD. * (C) notaz, 2007-2009 + * (C) irixxxx, 2019-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -14,12 +15,17 @@ uptr s68k_read16_map [0x1000000 >> M68K_MEM_SHIFT]; uptr s68k_write8_map [0x1000000 >> M68K_MEM_SHIFT]; uptr s68k_write16_map[0x1000000 >> M68K_MEM_SHIFT]; +#ifndef _ASM_CD_MEMORY_C MAKE_68K_READ8(s68k_read8, s68k_read8_map) MAKE_68K_READ16(s68k_read16, s68k_read16_map) MAKE_68K_READ32(s68k_read32, s68k_read16_map) MAKE_68K_WRITE8(s68k_write8, s68k_write8_map) MAKE_68K_WRITE16(s68k_write16, s68k_write16_map) MAKE_68K_WRITE32(s68k_write32, s68k_write16_map) +#endif + +u32 pcd_base_address; +#define BASE pcd_base_address // ----------------------------------------------------------------- @@ -66,14 +72,36 @@ static void remap_word_ram(u32 r3); void m68k_comm_check(u32 a) { - pcd_sync_s68k(SekCyclesDone(), 0); - if (SekNotPolling || a != Pico_mcd->m.m68k_poll_a) { + u32 cycles = SekCyclesDone(); + u32 clkdiff = cycles - Pico_mcd->m.m68k_poll_clk; + pcd_sync_s68k(cycles, 0); + if (a == 0x0e && !(Pico_mcd->m.state_flags & PCD_ST_S68K_SYNC) && (Pico_mcd->s68k_regs[3]&0x4)) { + // there are cases when slave updates comm and only switches RAM + // over after that (mcd1 bios), so there must be a resync.. + SekEndRun(64); + Pico_mcd->m.state_flags |= PCD_ST_S68K_SYNC; + } + Pico_mcd->m.m68k_poll_clk = cycles; + if (SekNotPolling || a != Pico_mcd->m.m68k_poll_a || clkdiff > POLL_CYCLES || clkdiff <= 16) { Pico_mcd->m.m68k_poll_a = a; Pico_mcd->m.m68k_poll_cnt = 0; SekNotPolling = 0; return; } Pico_mcd->m.m68k_poll_cnt++; + Pico_mcd->m.state_flags &= ~PCD_ST_M68K_POLL; + if (Pico_mcd->m.m68k_poll_cnt >= POLL_LIMIT) { + Pico_mcd->m.state_flags |= PCD_ST_M68K_POLL; + SekEndRun(8); + } +} + +u32 pcd_stopwatch_read(int sub) +{ + // ugh... stopwatch runs 384 cycles per step, divide by mult with inverse + u32 d = sub ? SekCyclesDoneS68k() : pcd_cycles_m68k_to_s68k(SekCyclesDone()); + d = ((d - Pico_mcd->m.stopwatch_base_c) * ((1LL << 32) / 384)) >> 32; + return d & 0x0fff; } #ifndef _ASM_CD_MEMORY_C @@ -84,9 +112,9 @@ static u32 m68k_reg_read16(u32 a) switch (a) { case 0: - // here IFL2 is always 0, just like in Gens - d = ((Pico_mcd->s68k_regs[0x33] << 13) & 0x8000) - | Pico_mcd->m.busreq; + pcd_sync_s68k(SekCyclesDone(), 0); + d = ((Pico_mcd->s68k_regs[0x33] & PCDS_IEN2) << 13) | + (Pico_mcd->m.state_flags & PCD_ST_S68K_IFL2) | Pico_mcd->m.busreq; goto end; case 2: m68k_comm_check(a); @@ -97,19 +125,16 @@ static u32 m68k_reg_read16(u32 a) d = Pico_mcd->s68k_regs[4]<<8; goto end; case 6: - d = *(u16 *)(Pico_mcd->bios + 0x72); + d = *(u16 *)(Pico.rom + 0x72); goto end; case 8: - d = Read_CDC_Host(0); + d = cdc_host_r(); goto end; - case 0xA: + case 0xa: elprintf(EL_UIO, "m68k FIXME: reserved read"); goto end; - case 0xC: // 384 cycle stopwatch timer - // ugh.. - d = pcd_cycles_m68k_to_s68k(SekCyclesDone()); - d = (d - Pico_mcd->m.stopwatch_base_c) / 384; - d &= 0x0fff; + case 0xc: // 384 cycle stopwatch timer + d = pcd_stopwatch_read(0); elprintf(EL_CDREGS, "m68k stopwatch timer read (%04x)", d); goto end; } @@ -118,7 +143,7 @@ static u32 m68k_reg_read16(u32 a) // comm flag/cmd/status (0xE-0x2F) m68k_comm_check(a); d = (Pico_mcd->s68k_regs[a]<<8) | Pico_mcd->s68k_regs[a+1]; - goto end; + return d; } elprintf(EL_UIO, "m68k_regs FIXME invalid read @ %02x", a); @@ -136,20 +161,27 @@ void m68k_reg_write8(u32 a, u32 d) u32 dold; a &= 0x3f; + Pico_mcd->m.state_flags &= ~PCD_ST_M68K_POLL; + Pico_mcd->m.m68k_poll_cnt = 0; + switch (a) { case 0: d &= 1; + pcd_sync_s68k(SekCyclesDone(), 0); if (d && (Pico_mcd->s68k_regs[0x33] & PCDS_IEN2)) { elprintf(EL_INTS, "m68k: s68k irq 2"); - pcd_sync_s68k(SekCyclesDone(), 0); - SekInterruptS68k(2); + Pico_mcd->m.state_flags |= PCD_ST_S68K_IFL2; + pcd_irq_s68k(2, 1); + } else { + Pico_mcd->m.state_flags &= ~PCD_ST_S68K_IFL2; + pcd_irq_s68k(2, 0); } return; case 1: d &= 3; dold = Pico_mcd->m.busreq; - if (!(d & 1)) - d |= 2; // verified: can't release bus on reset +// if (!(d & 1)) +// d |= 2; // verified: can't release bus on reset if (dold == d) return; @@ -160,11 +192,12 @@ void m68k_reg_write8(u32 a, u32 d) if (!(d & 1)) Pico_mcd->m.state_flags |= PCD_ST_S68K_RST; else if (d == 1 && (Pico_mcd->m.state_flags & PCD_ST_S68K_RST)) { - Pico_mcd->m.state_flags &= ~PCD_ST_S68K_RST; + Pico_mcd->m.state_flags &= ~(PCD_ST_S68K_RST|PCD_ST_S68K_POLL|PCD_ST_S68K_SLEEP); elprintf(EL_CDREGS, "m68k: resetting s68k"); SekResetS68k(); + SekCycleCntS68k += 40; } - if ((dold ^ d) & 2) { + if (((dold & 3) == 1) != ((d & 3) == 1)) { elprintf(EL_INTSW, "m68k: s68k brq %i", d >> 1); remap_prg_window(d, Pico_mcd->s68k_regs[3]); } @@ -172,11 +205,10 @@ void m68k_reg_write8(u32 a, u32 d) return; case 2: elprintf(EL_CDREGS, "m68k: prg wp=%02x", d); - Pico_mcd->s68k_regs[2] = d; // really use s68k side register - return; + goto write_comm; case 3: - dold = Pico_mcd->s68k_regs[3]; elprintf(EL_CDREG3, "m68k_regs w3: %02x @%06x", (u8)d, SekPc); + dold = Pico_mcd->s68k_regs[3]; if ((d ^ dold) & 0xc0) { elprintf(EL_CDREGS, "m68k: prg bank: %i -> %i", (Pico_mcd->s68k_regs[a]>>6), ((d>>6)&3)); @@ -194,15 +226,19 @@ void m68k_reg_write8(u32 a, u32 d) } else d = (d & 0xc0) | (dold & 0x1c) | Pico_mcd->m.dmna_ret_2m; - + if ((dold ^ d) & 0x1f) + remap_word_ram(d); goto write_comm; case 6: - Pico_mcd->bios[0x72 + 1] = d; // simple hint vector changer + Pico.rom[MEM_BE2(0x72)] = d; // simple hint vector changer return; case 7: - Pico_mcd->bios[0x72] = d; + Pico.rom[MEM_BE2(0x73)] = d; elprintf(EL_CDREGS, "hint vector set to %04x%04x", - ((u16 *)Pico_mcd->bios)[0x70/2], ((u16 *)Pico_mcd->bios)[0x72/2]); + ((u16 *)Pico.rom)[0x70/2], ((u16 *)Pico.rom)[0x72/2]); + return; + case 8: + (void) cdc_host_r(); // acts same as reading return; case 0x0f: a = 0x0e; @@ -217,18 +253,23 @@ void m68k_reg_write8(u32 a, u32 d) return; write_comm: - if (d == Pico_mcd->s68k_regs[a]) + if (Pico_mcd->s68k_regs[a] == (u8)d) return; pcd_sync_s68k(SekCyclesDone(), 0); Pico_mcd->s68k_regs[a] = d; - if (Pico_mcd->m.s68k_poll_a == (a & ~1)) - { - if (Pico_mcd->m.s68k_poll_cnt > POLL_LIMIT) { + if (a == 0x03) { + // There are cases when master checks for successful switching of RAM to + // slave. This can produce race conditions where slave switches RAM back to + // master while master is delayed by interrupt before the check executes. + // Delay slave a bit to make sure master can check before slave changes. + SekCycleCntS68k += 24; // Silpheed + } + if (!((Pico_mcd->m.s68k_poll_a ^ a) & ~1)) { + if (Pico_mcd->m.state_flags & PCD_ST_S68K_POLL) elprintf(EL_CDPOLL, "s68k poll release, a=%02x", a); - SekSetStopS68k(0); - } - Pico_mcd->m.s68k_poll_a = 0; + Pico_mcd->m.state_flags &= ~PCD_ST_S68K_POLL; + Pico_mcd->m.s68k_poll_cnt = 0; } } @@ -236,20 +277,23 @@ u32 s68k_poll_detect(u32 a, u32 d) { #ifdef USE_POLL_DETECT u32 cycles, cnt = 0; - if (SekIsStoppedS68k()) + if (Pico_mcd->m.state_flags & (PCD_ST_S68K_POLL|PCD_ST_S68K_SLEEP)) return d; cycles = SekCyclesDoneS68k(); - if (!SekNotPolling && a == Pico_mcd->m.s68k_poll_a) { + if (!SekNotPollingS68k && a == Pico_mcd->m.s68k_poll_a) { u32 clkdiff = cycles - Pico_mcd->m.s68k_poll_clk; if (clkdiff <= POLL_CYCLES) { cnt = Pico_mcd->m.s68k_poll_cnt + 1; //printf("-- diff: %u, cnt = %i\n", clkdiff, cnt); - if (Pico_mcd->m.s68k_poll_cnt > POLL_LIMIT) { - SekSetStopS68k(1); + Pico_mcd->m.state_flags &= ~PCD_ST_S68K_POLL; + if (cnt > POLL_LIMIT) { + Pico_mcd->m.state_flags |= PCD_ST_S68K_POLL; + SekEndRunS68k(8); elprintf(EL_CDPOLL, "s68k poll detected @%06x, a=%02x", SekPcS68k, a); - } + } else if (cnt > 2) + SekEndRunS68k(240); } } Pico_mcd->m.s68k_poll_a = a; @@ -262,7 +306,7 @@ u32 s68k_poll_detect(u32 a, u32 d) #define READ_FONT_DATA(basemask) \ { \ - unsigned int fnt = *(unsigned int *)(Pico_mcd->s68k_regs + 0x4c); \ + unsigned int fnt = CPU_LE4(*(u32 *)(Pico_mcd->s68k_regs + 0x4c)); \ unsigned int col0 = (fnt >> 8) & 0x0f, col1 = (fnt >> 12) & 0x0f; \ if (fnt & (basemask << 0)) d = col1 ; else d = col0; \ if (fnt & (basemask << 1)) d |= col1 << 4; else d |= col0 << 4; \ @@ -280,45 +324,53 @@ u32 s68k_reg_read16(u32 a) switch (a) { case 0: - return ((Pico_mcd->s68k_regs[0]&3)<<8) | 1; // ver = 0, not in reset state + d = ((Pico_mcd->s68k_regs[0]&3)<<8) | 1; // ver = 0, not in reset state + goto end; case 2: d = (Pico_mcd->s68k_regs[2]<<8) | (Pico_mcd->s68k_regs[3]&0x1f); elprintf(EL_CDREG3, "s68k_regs r3: %02x @%06x", (u8)d, SekPcS68k); - return s68k_poll_detect(a, d); + s68k_poll_detect(a, d); + goto end; + case 4: + d = (Pico_mcd->s68k_regs[4]<<8) | (Pico_mcd->s68k_regs[5]&0x1f); + goto end; case 6: - return CDC_Read_Reg(); + d = cdc_reg_r(); + goto end; case 8: - return Read_CDC_Host(1); // Gens returns 0 here on byte reads - case 0xC: - d = SekCyclesDoneS68k() - Pico_mcd->m.stopwatch_base_c; - d /= 384; - d &= 0x0fff; + d = cdc_host_r(); + goto end; + case 0xc: + d = pcd_stopwatch_read(1); elprintf(EL_CDREGS, "s68k stopwatch timer read (%04x)", d); - return d; + goto end; case 0x30: - elprintf(EL_CDREGS, "s68k int3 timer read (%02x)", Pico_mcd->s68k_regs[31]); - return Pico_mcd->s68k_regs[31]; + elprintf(EL_CDREGS, "s68k int3 timer read (%02x)", Pico_mcd->s68k_regs[0x31]); + d = Pico_mcd->s68k_regs[0x31]; + goto end; case 0x34: // fader - return 0; // no busy bit + d = 0; // no busy bit + goto end; case 0x50: // font data (check: Lunar 2, Silpheed) READ_FONT_DATA(0x00100000); - return d; + goto end; case 0x52: READ_FONT_DATA(0x00010000); - return d; + goto end; case 0x54: READ_FONT_DATA(0x10000000); - return d; + goto end; case 0x56: READ_FONT_DATA(0x01000000); - return d; + goto end; } d = (Pico_mcd->s68k_regs[a]<<8) | Pico_mcd->s68k_regs[a+1]; - if (a >= 0x0e && a < 0x30) - return s68k_poll_detect(a, d); + if ((a >= 0x0e && a < 0x30) || a == 0x58) + d = s68k_poll_detect(a, d); +end: return d; } @@ -333,8 +385,7 @@ void s68k_reg_write8(u32 a, u32 d) if (!(d & 1)) pcd_soft_reset(); return; - case 2: - return; // only m68k can change WP + case 2: a++; // byte access only, ignores LDS/UDS case 3: { int dold = Pico_mcd->s68k_regs[3]; elprintf(EL_CDREG3, "s68k_regs w3: %02x @%06x", (u8)d, SekPcS68k); @@ -354,9 +405,6 @@ void s68k_reg_write8(u32 a, u32 d) wram_2M_to_1M(Pico_mcd->word_ram2M); } - if ((d ^ dold) & 0x1d) - remap_word_ram(d); - if ((d ^ dold) & 0x05) d &= ~2; // clear DMNA - swap complete } @@ -365,51 +413,62 @@ void s68k_reg_write8(u32 a, u32 d) if (dold & 4) { elprintf(EL_CDREG3, "wram mode 1M->2M"); wram_1M_to_2M(Pico_mcd->word_ram2M); - remap_word_ram(d); } d = (d & ~3) | Pico_mcd->m.dmna_ret_2m; } + if ((dold ^ d) & 0x1f) + remap_word_ram(d); goto write_comm; } case 4: elprintf(EL_CDREGS, "s68k CDC dest: %x", d&7); - Pico_mcd->s68k_regs[4] = (Pico_mcd->s68k_regs[4]&0xC0) | (d&7); // CDC mode + Pico_mcd->s68k_regs[a] = (d&7); // CDC mode + Pico_mcd->s68k_regs[0xa] = Pico_mcd->s68k_regs[0xb] = 0; // resets DMA return; case 5: - //dprintf("s68k CDC reg addr: %x", d&0xf); - break; + //dprintf("s68k CDC reg addr: %x", d&0x1f); + Pico_mcd->s68k_regs[a] = (d&0x1f); + return; case 7: - CDC_Write_Reg(d); + cdc_reg_w(d & 0xff); return; case 0xa: + case 0xb: + // word access only. 68k sets both bus halves to value d. elprintf(EL_CDREGS, "s68k set CDC dma addr"); - break; + Pico_mcd->s68k_regs[0xa] = Pico_mcd->s68k_regs[0xb] = d; + return; case 0xc: case 0xd: // 384 cycle stopwatch timer elprintf(EL_CDREGS|EL_CD, "s68k clear stopwatch (%x)", d); // does this also reset internal 384 cycle counter? Pico_mcd->m.stopwatch_base_c = SekCyclesDoneS68k(); return; - case 0x0e: - a = 0x0f; + case 0x0e: a++; case 0x0f: goto write_comm; + case 0x30: a++; case 0x31: // 384 cycle int3 timer d &= 0xff; elprintf(EL_CDREGS|EL_CD, "s68k set int3 timer: %02x", d); Pico_mcd->s68k_regs[a] = (u8) d; - if (d) // d or d+1?? - pcd_event_schedule_s68k(PCD_EVENT_TIMER3, d * 384); + if (d) // XXX: d or d+1? mcd-verificator results suggest d+1 + pcd_event_schedule_s68k(PCD_EVENT_TIMER3, (d+1) * 384); else pcd_event_schedule(0, PCD_EVENT_TIMER3, 0); break; case 0x33: // IRQ mask elprintf(EL_CDREGS|EL_CD, "s68k irq mask: %02x", d); d &= 0x7e; - if ((d ^ Pico_mcd->s68k_regs[0x33]) & d & PCDS_IEN4) { - if (Pico_mcd->s68k_regs[0x37] & 4) - CDD_Export_Status(); + if ((d ^ Pico_mcd->s68k_regs[0x33]) & PCDS_IEN4) { + // XXX: emulate pending irq instead? + if ((d & PCDS_IEN4) && (Pico_mcd->s68k_regs[0x37] & 4)) { + elprintf(EL_INTS, "cdd export irq 4 (unmask)"); + pcd_irq_s68k(4, 1); + } } + if ((d ^ Pico_mcd->s68k_regs[0x33]) & ~d & PCDS_IEN2) + pcd_irq_s68k(2, 0); break; case 0x34: // fader Pico_mcd->s68k_regs[a] = (u8) d & 0x7f; @@ -418,16 +477,40 @@ void s68k_reg_write8(u32 a, u32 d) return; // d/m bit is unsetable case 0x37: { u32 d_old = Pico_mcd->s68k_regs[0x37]; - Pico_mcd->s68k_regs[0x37] = d&7; - if ((d&4) && !(d_old&4)) { - CDD_Export_Status(); + Pico_mcd->s68k_regs[0x37] = d & 7; + if ((d ^ d_old) & 4) { + // ?? + if (d & 4) + pcd_event_schedule_s68k(PCD_EVENT_CDC, 12500000/75); + + if ((d & 4) && (Pico_mcd->s68k_regs[0x33] & PCDS_IEN4)) { + elprintf(EL_INTS, "cdd export irq 4"); + pcd_irq_s68k(4, 1); + } } return; } case 0x4b: - Pico_mcd->s68k_regs[a] = (u8) d; - CDD_Import_Command(); + Pico_mcd->s68k_regs[a] = 0; // (u8) d; ? + cdd_process(); + { + static const char *nm[] = + { "stat", "stop", "read_toc", "play", + "seek", "???", "pause", "resume", + "ff", "fr", "tjump", "???", + "close","open", "???", "???" }; + u8 *c = &Pico_mcd->s68k_regs[0x42]; + u8 *s = &Pico_mcd->s68k_regs[0x38]; + elprintf(EL_CD, + "CDD command: %02x %02x %02x %02x %02x %02x %02x %02x %12s", + c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7], nm[c[0] & 0x0f]); + elprintf(EL_CD, + "CDD status: %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x", + s[0], s[1], s[2], s[3], s[4], s[5], s[6], s[7], s[8], s[9]); + } return; + case 0x4c: a++; + break; case 0x58: return; } @@ -445,24 +528,39 @@ void s68k_reg_write8(u32 a, u32 d) return; write_comm: + if (Pico_mcd->s68k_regs[a] == (u8)d) + return; + Pico_mcd->s68k_regs[a] = (u8) d; - if (Pico_mcd->m.m68k_poll_cnt) - SekEndRunS68k(0); - Pico_mcd->m.m68k_poll_cnt = 0; + if (!((Pico_mcd->m.m68k_poll_a ^ a) & ~1)) { + SekEndRunS68k(8); + Pico_mcd->m.state_flags &= ~PCD_ST_M68K_POLL; + Pico_mcd->m.m68k_poll_cnt = 0; + } } void s68k_reg_write16(u32 a, u32 d) { u8 *r = Pico_mcd->s68k_regs; + Pico_mcd->m.state_flags &= ~PCD_ST_S68K_POLL; + Pico_mcd->m.s68k_poll_cnt = 0; + if ((a & 0x1f0) == 0x20) goto write_comm; switch (a) { + case 0x02: case 0x0e: - // special case, 2 byte writes would be handled differently - // TODO: verify - r[0xf] = d; + case 0x30: + case 0x4c: + // these are only byte registers, LDS/UDS ignored + return s68k_reg_write8(a + 1, d); + case 0x08: + return (void) cdc_host_r(); // acts same as reading + case 0x0a: // DMA address + r[0xa] = d >> 8; + r[0xb] = d; return; case 0x58: // stamp data size r[0x59] = d & 7; @@ -503,11 +601,16 @@ void s68k_reg_write16(u32 a, u32 d) return; write_comm: + if (r[a] == (u8)(d >> 8) && r[a + 1] == (u8)d) + return; + r[a] = d >> 8; r[a + 1] = d; - if (Pico_mcd->m.m68k_poll_cnt) - SekEndRunS68k(0); - Pico_mcd->m.m68k_poll_cnt = 0; + if (!((Pico_mcd->m.m68k_poll_a ^ a) & ~1)) { + SekEndRunS68k(8); + Pico_mcd->m.state_flags &= ~PCD_ST_M68K_POLL; + Pico_mcd->m.m68k_poll_cnt = 0; + } } // ----------------------------------------------------------------- @@ -521,13 +624,13 @@ write_comm: static u32 PicoReadM68k8_cell0(u32 a) { a = (a&3) | (cell_map(a >> 2) << 2); // cell arranged - return Pico_mcd->word_ram1M[0][a ^ 1]; + return Pico_mcd->word_ram1M[0][MEM_BE2(a)]; } static u32 PicoReadM68k8_cell1(u32 a) { a = (a&3) | (cell_map(a >> 2) << 2); - return Pico_mcd->word_ram1M[1][a ^ 1]; + return Pico_mcd->word_ram1M[1][MEM_BE2(a)]; } static u32 PicoReadM68k16_cell0(u32 a) @@ -545,13 +648,13 @@ static u32 PicoReadM68k16_cell1(u32 a) static void PicoWriteM68k8_cell0(u32 a, u32 d) { a = (a&3) | (cell_map(a >> 2) << 2); - Pico_mcd->word_ram1M[0][a ^ 1] = d; + Pico_mcd->word_ram1M[0][MEM_BE2(a)] = d; } static void PicoWriteM68k8_cell1(u32 a, u32 d) { a = (a&3) | (cell_map(a >> 2) << 2); - Pico_mcd->word_ram1M[1][a ^ 1] = d; + Pico_mcd->word_ram1M[1][MEM_BE2(a)] = d; } static void PicoWriteM68k16_cell0(u32 a, u32 d) @@ -567,24 +670,44 @@ static void PicoWriteM68k16_cell1(u32 a, u32 d) } #endif -// RAM cart (40000 - 7fffff, optional) +// BIOS faking for MSU-MD, checks for "SEGA" at 0x400100 to detect CD drive +static u8 bios_id[4] = "SEGA"; + +static u32 PicoReadM68k8_bios(u32 a) +{ + if ((a & 0xfffffc) == BASE+0x100) // CD detection by MSU + return bios_id[a&3]; + return 0; +} + +static u32 PicoReadM68k16_bios(u32 a) +{ + if ((a & 0xfffffc) == BASE+0x100) // CD detection by MSU + return (bios_id[a&2]<<8) | bios_id[(a&2)+1]; + return 0; +} + +// RAM cart (400000 - 7fffff, optional) static u32 PicoReadM68k8_ramc(u32 a) { u32 d = 0; - if (a == 0x400001) { - if (SRam.data != NULL) - d = 3; // 64k cart - return d; - } - if ((a & 0xfe0000) == 0x600000) { - if (SRam.data != NULL) - d = SRam.data[((a >> 1) & 0xffff) + 0x2000]; - return d; - } + if (PicoIn.opt & POPT_EN_MCD_RAMCART) { + if ((a & 0xf00001) == 0x400001) { + if (Pico.sv.data != NULL) + d = 3; // 64k cart + return d; + } - if (a == 0x7fffff) - return Pico_mcd->m.bcram_reg; + if ((a & 0xf00001) == 0x600001) { + if (Pico.sv.data != NULL) + d = Pico.sv.data[((a >> 1) & 0xffff) + 0x2000]; + return d; + } + + if ((a & 0xf00001) == 0x700001) + return Pico_mcd->m.bcram_reg; + } elprintf(EL_UIO, "m68k unmapped r8 [%06x] @%06x", a, SekPc); return d; @@ -592,23 +715,25 @@ static u32 PicoReadM68k8_ramc(u32 a) static u32 PicoReadM68k16_ramc(u32 a) { - elprintf(EL_ANOMALY, "ramcart r16: [%06x] @%06x", a, SekPcS68k); + elprintf(EL_ANOMALY, "ramcart r16: [%06x] @%06x", a, SekPc); return PicoReadM68k8_ramc(a + 1); } static void PicoWriteM68k8_ramc(u32 a, u32 d) { - if ((a & 0xfe0000) == 0x600000) { - if (SRam.data != NULL && (Pico_mcd->m.bcram_reg & 1)) { - SRam.data[((a>>1) & 0xffff) + 0x2000] = d; - SRam.changed = 1; + if (PicoIn.opt & POPT_EN_MCD_RAMCART) { + if ((a & 0xf00001) == 0x600001) { + if (Pico.sv.data != NULL && (Pico_mcd->m.bcram_reg & 1)) { + Pico.sv.data[((a >> 1) & 0xffff) + 0x2000] = d; + Pico.sv.changed = 1; + } + return; } - return; - } - if (a == 0x7fffff) { - Pico_mcd->m.bcram_reg = d; - return; + if ((a & 0xf00001) == 0x700001) { + Pico_mcd->m.bcram_reg = d; + return; + } } elprintf(EL_UIO, "m68k unmapped w8 [%06x] %02x @%06x", @@ -715,7 +840,7 @@ static void s68k_unmapped_write16(u32 a, u32 d) static void PicoWriteS68k8_prgwp(u32 a, u32 d) { if (a >= (Pico_mcd->s68k_regs[2] << 9)) - Pico_mcd->prg_ram[a ^ 1] = d; + Pico_mcd->prg_ram[MEM_BE2(a)] = d; } static void PicoWriteS68k16_prgwp(u32 a, u32 d) @@ -729,7 +854,7 @@ static void PicoWriteS68k16_prgwp(u32 a, u32 d) // decode (080000 - 0bffff, in 1M mode) static u32 PicoReadS68k8_dec0(u32 a) { - u32 d = Pico_mcd->word_ram1M[0][((a >> 1) ^ 1) & 0x1ffff]; + u32 d = Pico_mcd->word_ram1M[0][MEM_BE2(a >> 1) & 0x1ffff]; if (a & 1) d &= 0x0f; else @@ -739,7 +864,7 @@ static u32 PicoReadS68k8_dec0(u32 a) static u32 PicoReadS68k8_dec1(u32 a) { - u32 d = Pico_mcd->word_ram1M[1][((a >> 1) ^ 1) & 0x1ffff]; + u32 d = Pico_mcd->word_ram1M[1][MEM_BE2(a >> 1) & 0x1ffff]; if (a & 1) d &= 0x0f; else @@ -749,7 +874,7 @@ static u32 PicoReadS68k8_dec1(u32 a) static u32 PicoReadS68k16_dec0(u32 a) { - u32 d = Pico_mcd->word_ram1M[0][((a >> 1) ^ 1) & 0x1ffff]; + u32 d = Pico_mcd->word_ram1M[0][MEM_BE2(a >> 1) & 0x1ffff]; d |= d << 4; d &= ~0xf0; return d; @@ -757,7 +882,7 @@ static u32 PicoReadS68k16_dec0(u32 a) static u32 PicoReadS68k16_dec1(u32 a) { - u32 d = Pico_mcd->word_ram1M[1][((a >> 1) ^ 1) & 0x1ffff]; + u32 d = Pico_mcd->word_ram1M[1][MEM_BE2(a >> 1) & 0x1ffff]; d |= d << 4; d &= ~0xf0; return d; @@ -767,7 +892,7 @@ static u32 PicoReadS68k16_dec1(u32 a) #define mk_decode_w8(bank) \ static void PicoWriteS68k8_dec_m0b##bank(u32 a, u32 d) \ { \ - u8 *pd = &Pico_mcd->word_ram1M[bank][((a >> 1) ^ 1) & 0x1ffff]; \ + u8 *pd = &Pico_mcd->word_ram1M[bank][MEM_BE2(a >> 1) & 0x1ffff];\ \ if (!(a & 1)) \ *pd = (*pd & 0x0f) | (d << 4); \ @@ -777,7 +902,7 @@ static void PicoWriteS68k8_dec_m0b##bank(u32 a, u32 d) \ \ static void PicoWriteS68k8_dec_m1b##bank(u32 a, u32 d) \ { \ - u8 *pd = &Pico_mcd->word_ram1M[bank][((a >> 1) ^ 1) & 0x1ffff]; \ + u8 *pd = &Pico_mcd->word_ram1M[bank][MEM_BE2(a >> 1) & 0x1ffff];\ u8 mask = (a & 1) ? 0x0f : 0xf0; \ \ if (!(*pd & mask) && (d & 0x0f)) /* underwrite */ \ @@ -796,7 +921,7 @@ mk_decode_w8(1) #define mk_decode_w16(bank) \ static void PicoWriteS68k16_dec_m0b##bank(u32 a, u32 d) \ { \ - u8 *pd = &Pico_mcd->word_ram1M[bank][((a >> 1) ^ 1) & 0x1ffff]; \ + u8 *pd = &Pico_mcd->word_ram1M[bank][MEM_BE2(a >> 1) & 0x1ffff];\ \ d &= 0x0f0f; \ *pd = d | (d >> 4); \ @@ -804,7 +929,7 @@ static void PicoWriteS68k16_dec_m0b##bank(u32 a, u32 d) \ \ static void PicoWriteS68k16_dec_m1b##bank(u32 a, u32 d) \ { \ - u8 *pd = &Pico_mcd->word_ram1M[bank][((a >> 1) ^ 1) & 0x1ffff]; \ + u8 *pd = &Pico_mcd->word_ram1M[bank][MEM_BE2(a >> 1) & 0x1ffff];\ \ d &= 0x0f0f; /* underwrite */ \ if (!(*pd & 0xf0)) *pd |= d >> 4; \ @@ -813,7 +938,7 @@ static void PicoWriteS68k16_dec_m1b##bank(u32 a, u32 d) \ \ static void PicoWriteS68k16_dec_m2b##bank(u32 a, u32 d) \ { \ - u8 *pd = &Pico_mcd->word_ram1M[bank][((a >> 1) ^ 1) & 0x1ffff]; \ + u8 *pd = &Pico_mcd->word_ram1M[bank][MEM_BE2(a >> 1) & 0x1ffff];\ \ d &= 0x0f0f; /* overwrite */ \ d |= d >> 4; \ @@ -839,15 +964,16 @@ static u32 PicoReadS68k16_bram(u32 a) u32 d; elprintf(EL_ANOMALY, "FIXME: s68k_bram r16: [%06x] @%06x", a, SekPcS68k); a = (a >> 1) & 0x1fff; - d = Pico_mcd->bram[a++]; - d|= Pico_mcd->bram[a++] << 8; // probably wrong, TODO: verify + d = Pico_mcd->bram[a]; return d; } static void PicoWriteS68k8_bram(u32 a, u32 d) { - Pico_mcd->bram[(a >> 1) & 0x1fff] = d; - SRam.changed = 1; + if (a & 1) { + Pico_mcd->bram[(a >> 1) & 0x1fff] = d; + Pico.sv.changed = 1; + } } static void PicoWriteS68k16_bram(u32 a, u32 d) @@ -855,8 +981,7 @@ static void PicoWriteS68k16_bram(u32 a, u32 d) elprintf(EL_ANOMALY, "s68k_bram w16: [%06x] %04x @%06x", a, d, SekPcS68k); a = (a >> 1) & 0x1fff; Pico_mcd->bram[a++] = d; - Pico_mcd->bram[a++] = d >> 8; // TODO: verify.. - SRam.changed = 1; + Pico.sv.changed = 1; } #ifndef _ASM_CD_MEMORY_C @@ -871,7 +996,7 @@ static u32 PicoReadS68k8_pr(u32 a) a &= 0x1ff; if (a >= 0x0e && a < 0x30) { d = Pico_mcd->s68k_regs[a]; - s68k_poll_detect(a & ~1, d); + d = s68k_poll_detect(a & ~1, d); goto regs_done; } d = s68k_reg_read16(a & ~1); @@ -1001,63 +1126,85 @@ static const void *s68k_dec_write16[2][4] = { static void remap_prg_window(u32 r1, u32 r3) { - // PRG RAM - if (r1 & 2) { + // PRG RAM, mapped to main CPU if sub is not running + if ((r1 & 3) != 1) { void *bank = Pico_mcd->prg_ram_b[(r3 >> 6) & 3]; - cpu68k_map_all_ram(0x020000, 0x03ffff, bank, 0); - } - else { - m68k_map_unmap(0x020000, 0x03ffff); + cpu68k_map_all_ram(BASE+0x020000, BASE+0x03ffff, bank, 0); + } else { + m68k_map_unmap(BASE+0x020000, BASE+0x03ffff); } } +// if sub CPU accesses Word-RAM while it is assigned to the main CPU, +// GA doesn't assert DTACK, which means the CPU is blocked until the Word_RAM +// is reassigned to it (e.g. Mega Race). +// since DTACK isn't on the expansion port, main cpu accesses are not blocked. +// XXX is data read/written if main is accessing Word_RAM while not owning it? +static u32 s68k_wordram_main_read8(u32 a) +{ + Pico_mcd->m.state_flags |= PCD_ST_S68K_SLEEP; + SekEndRunS68k(0); + return Pico_mcd->word_ram2M[MEM_BE2(a) & 0x3ffff]; +} + +static u32 s68k_wordram_main_read16(u32 a) +{ + Pico_mcd->m.state_flags |= PCD_ST_S68K_SLEEP; + SekEndRunS68k(0); + return ((u16 *)Pico_mcd->word_ram2M)[(a >> 1) & 0x1ffff]; +} + +static void s68k_wordram_main_write8(u32 a, u32 d) +{ + Pico_mcd->m.state_flags |= PCD_ST_S68K_SLEEP; + SekEndRunS68k(0); + Pico_mcd->word_ram2M[MEM_BE2(a) & 0x3ffff] = d; +} + +static void s68k_wordram_main_write16(u32 a, u32 d) +{ + Pico_mcd->m.state_flags |= PCD_ST_S68K_SLEEP; + SekEndRunS68k(0); + ((u16 *)Pico_mcd->word_ram2M)[(a >> 1) & 0x1ffff] = d; +} + static void remap_word_ram(u32 r3) { void *bank; // WORD RAM if (!(r3 & 4)) { - // 2M mode. XXX: allowing access in all cases for simplicity + // 2M mode. bank = Pico_mcd->word_ram2M; - cpu68k_map_all_ram(0x200000, 0x23ffff, bank, 0); - cpu68k_map_all_ram(0x080000, 0x0bffff, bank, 1); + if (r3 & 1) { + cpu68k_map_all_ram(BASE+0x200000, BASE+0x23ffff, bank, 0); + cpu68k_map_all_funcs(0x80000, 0xbffff, + s68k_wordram_main_read8, s68k_wordram_main_read16, + s68k_wordram_main_write8, s68k_wordram_main_write16, 1); + } else { + Pico_mcd->m.state_flags &= ~PCD_ST_S68K_SLEEP; + cpu68k_map_all_ram(0x080000, 0x0bffff, bank, 1); + m68k_map_unmap(BASE+0x200000, BASE+0x23ffff); + } // TODO: handle 0x0c0000 } else { int b0 = r3 & 1; int m = (r3 & 0x18) >> 3; + Pico_mcd->m.state_flags &= ~PCD_ST_S68K_SLEEP; bank = Pico_mcd->word_ram1M[b0]; - cpu68k_map_all_ram(0x200000, 0x21ffff, bank, 0); + cpu68k_map_all_ram(BASE+0x200000, BASE+0x21ffff, bank, 0); bank = Pico_mcd->word_ram1M[b0 ^ 1]; cpu68k_map_all_ram(0x0c0000, 0x0effff, bank, 1); // "cell arrange" on m68k - cpu68k_map_set(m68k_read8_map, 0x220000, 0x23ffff, m68k_cell_read8[b0], 1); - cpu68k_map_set(m68k_read16_map, 0x220000, 0x23ffff, m68k_cell_read16[b0], 1); - cpu68k_map_set(m68k_write8_map, 0x220000, 0x23ffff, m68k_cell_write8[b0], 1); - cpu68k_map_set(m68k_write16_map, 0x220000, 0x23ffff, m68k_cell_write16[b0], 1); + cpu68k_map_all_funcs(BASE+0x220000, BASE+0x23ffff, + m68k_cell_read8[b0], m68k_cell_read16[b0], + m68k_cell_write8[b0], m68k_cell_write16[b0], 0); // "decode format" on s68k - cpu68k_map_set(s68k_read8_map, 0x080000, 0x0bffff, s68k_dec_read8[b0 ^ 1], 1); - cpu68k_map_set(s68k_read16_map, 0x080000, 0x0bffff, s68k_dec_read16[b0 ^ 1], 1); - cpu68k_map_set(s68k_write8_map, 0x080000, 0x0bffff, s68k_dec_write8[b0 ^ 1][m], 1); - cpu68k_map_set(s68k_write16_map, 0x080000, 0x0bffff, s68k_dec_write16[b0 ^ 1][m], 1); + cpu68k_map_all_funcs(0x80000, 0xbffff, + s68k_dec_read8[b0^1], s68k_dec_read16[b0^1], + s68k_dec_write8[b0^1][m], s68k_dec_write16[b0^1][m], 1); } - -#ifdef EMU_F68K - // update fetchmap.. - int i; - if (!(r3 & 4)) - { - for (i = M68K_FETCHBANK1*2/16; (i<<(24-FAMEC_FETCHBITS)) < 0x240000; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico_mcd->word_ram2M - 0x200000; - } - else - { - for (i = M68K_FETCHBANK1*2/16; (i<<(24-FAMEC_FETCHBITS)) < 0x220000; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico_mcd->word_ram1M[r3 & 1] - 0x200000; - for (i = M68K_FETCHBANK1*0x0c/0x100; (i<<(24-FAMEC_FETCHBITS)) < 0x0e0000; i++) - PicoCpuFS68k.Fetch[i] = (unsigned long)Pico_mcd->word_ram1M[(r3&1)^1] - 0x0c0000; - } -#endif } void pcd_state_loaded_mem(void) @@ -1072,7 +1219,7 @@ void pcd_state_loaded_mem(void) Pico_mcd->m.dmna_ret_2m &= 3; // restore hint vector - *(unsigned short *)(Pico_mcd->bios + 0x72) = Pico_mcd->m.hint_vector; + *(u16 *)(Pico.rom + 0x72) = Pico_mcd->m.hint_vector; } #ifdef EMU_M68K @@ -1081,12 +1228,22 @@ static void m68k_mem_setup_cd(void); PICO_INTERNAL void PicoMemSetupCD(void) { + if (!Pico_mcd) { + Pico_mcd = plat_mmap(0x05000000, sizeof(mcd_state), 0, 0); + memset(Pico_mcd, 0, sizeof(mcd_state)); + } + pcd_base_address = (Pico.romsize > 0x20000 ? 0x400000 : 0x000000); + // setup default main68k map PicoMemSetup(); - // main68k map (BIOS mapped by PicoMemSetup()): - // RAM cart - if (PicoOpt & POPT_EN_MCD_RAMCART) { + // main68k map (BIOS or MSU mapped by PicoMemSetup()): + if (Pico.romsize > 0x20000) { + // MSU cartridge. Fake BIOS detection + cpu68k_map_set(m68k_read8_map, 0x400000, 0x41ffff, PicoReadM68k8_bios, 1); + cpu68k_map_set(m68k_read16_map, 0x400000, 0x41ffff, PicoReadM68k16_bios, 1); + } else { + // RAM cart cpu68k_map_set(m68k_read8_map, 0x400000, 0x7fffff, PicoReadM68k8_ramc, 1); cpu68k_map_set(m68k_read16_map, 0x400000, 0x7fffff, PicoReadM68k16_ramc, 1); cpu68k_map_set(m68k_write8_map, 0x400000, 0x7fffff, PicoWriteM68k8_ramc, 1); @@ -1100,32 +1257,33 @@ PICO_INTERNAL void PicoMemSetupCD(void) cpu68k_map_set(m68k_write16_map, 0xa10000, 0xa1ffff, PicoWrite16_mcd_io, 1); // sub68k map - cpu68k_map_set(s68k_read8_map, 0x000000, 0xffffff, s68k_unmapped_read8, 1); - cpu68k_map_set(s68k_read16_map, 0x000000, 0xffffff, s68k_unmapped_read16, 1); - cpu68k_map_set(s68k_write8_map, 0x000000, 0xffffff, s68k_unmapped_write8, 1); - cpu68k_map_set(s68k_write16_map, 0x000000, 0xffffff, s68k_unmapped_write16, 1); + cpu68k_map_set(s68k_read8_map, 0x000000, 0xffffff, s68k_unmapped_read8, 3); + cpu68k_map_set(s68k_read16_map, 0x000000, 0xffffff, s68k_unmapped_read16, 3); + cpu68k_map_set(s68k_write8_map, 0x000000, 0xffffff, s68k_unmapped_write8, 3); + cpu68k_map_set(s68k_write16_map, 0x000000, 0xffffff, s68k_unmapped_write16, 3); // PRG RAM - cpu68k_map_set(s68k_read8_map, 0x000000, 0x07ffff, Pico_mcd->prg_ram, 0); - cpu68k_map_set(s68k_read16_map, 0x000000, 0x07ffff, Pico_mcd->prg_ram, 0); - cpu68k_map_set(s68k_write8_map, 0x000000, 0x07ffff, Pico_mcd->prg_ram, 0); - cpu68k_map_set(s68k_write16_map, 0x000000, 0x07ffff, Pico_mcd->prg_ram, 0); - cpu68k_map_set(s68k_write8_map, 0x000000, 0x01ffff, PicoWriteS68k8_prgwp, 1); - cpu68k_map_set(s68k_write16_map, 0x000000, 0x01ffff, PicoWriteS68k16_prgwp, 1); + cpu68k_map_set(s68k_read8_map, 0x000000, 0x07ffff, Pico_mcd->prg_ram, 2); + cpu68k_map_set(s68k_read16_map, 0x000000, 0x07ffff, Pico_mcd->prg_ram, 2); + cpu68k_map_set(s68k_write8_map, 0x000000, 0x07ffff, Pico_mcd->prg_ram, 2); + cpu68k_map_set(s68k_write16_map, 0x000000, 0x07ffff, Pico_mcd->prg_ram, 2); + cpu68k_map_set(s68k_write8_map, 0x000000, 0x01ffff, PicoWriteS68k8_prgwp, 3); + cpu68k_map_set(s68k_write16_map, 0x000000, 0x01ffff, PicoWriteS68k16_prgwp, 3); // BRAM - cpu68k_map_set(s68k_read8_map, 0xfe0000, 0xfeffff, PicoReadS68k8_bram, 1); - cpu68k_map_set(s68k_read16_map, 0xfe0000, 0xfeffff, PicoReadS68k16_bram, 1); - cpu68k_map_set(s68k_write8_map, 0xfe0000, 0xfeffff, PicoWriteS68k8_bram, 1); - cpu68k_map_set(s68k_write16_map, 0xfe0000, 0xfeffff, PicoWriteS68k16_bram, 1); + cpu68k_map_set(s68k_read8_map, 0xfe0000, 0xfeffff, PicoReadS68k8_bram, 3); + cpu68k_map_set(s68k_read16_map, 0xfe0000, 0xfeffff, PicoReadS68k16_bram, 3); + cpu68k_map_set(s68k_write8_map, 0xfe0000, 0xfeffff, PicoWriteS68k8_bram, 3); + cpu68k_map_set(s68k_write16_map, 0xfe0000, 0xfeffff, PicoWriteS68k16_bram, 3); // PCM, regs - cpu68k_map_set(s68k_read8_map, 0xff0000, 0xffffff, PicoReadS68k8_pr, 1); - cpu68k_map_set(s68k_read16_map, 0xff0000, 0xffffff, PicoReadS68k16_pr, 1); - cpu68k_map_set(s68k_write8_map, 0xff0000, 0xffffff, PicoWriteS68k8_pr, 1); - cpu68k_map_set(s68k_write16_map, 0xff0000, 0xffffff, PicoWriteS68k16_pr, 1); + cpu68k_map_set(s68k_read8_map, 0xff0000, 0xffffff, PicoReadS68k8_pr, 3); + cpu68k_map_set(s68k_read16_map, 0xff0000, 0xffffff, PicoReadS68k16_pr, 3); + cpu68k_map_set(s68k_write8_map, 0xff0000, 0xffffff, PicoWriteS68k8_pr, 3); + cpu68k_map_set(s68k_write16_map, 0xff0000, 0xffffff, PicoWriteS68k16_pr, 3); // RAMs + remap_prg_window(2,1); remap_word_ram(1); #ifdef EMU_C68K @@ -1143,38 +1301,12 @@ PICO_INTERNAL void PicoMemSetupCD(void) #endif #ifdef EMU_F68K // s68k - PicoCpuFS68k.read_byte = s68k_read8; - PicoCpuFS68k.read_word = s68k_read16; - PicoCpuFS68k.read_long = s68k_read32; - PicoCpuFS68k.write_byte = s68k_write8; - PicoCpuFS68k.write_word = s68k_write16; - PicoCpuFS68k.write_long = s68k_write32; - - // setup FAME fetchmap - { - int i; - // M68k - // by default, point everything to fitst 64k of ROM (BIOS) - for (i = 0; i < M68K_FETCHBANK1; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.rom - (i<<(24-FAMEC_FETCHBITS)); - // now real ROM (BIOS) - for (i = 0; i < M68K_FETCHBANK1 && (i<<(24-FAMEC_FETCHBITS)) < Pico.romsize; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.rom; - // .. and RAM - for (i = M68K_FETCHBANK1*14/16; i < M68K_FETCHBANK1; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.ram - (i<<(24-FAMEC_FETCHBITS)); - // S68k - // PRG RAM is default - for (i = 0; i < M68K_FETCHBANK1; i++) - PicoCpuFS68k.Fetch[i] = (unsigned long)Pico_mcd->prg_ram - (i<<(24-FAMEC_FETCHBITS)); - // real PRG RAM - for (i = 0; i < M68K_FETCHBANK1 && (i<<(24-FAMEC_FETCHBITS)) < 0x80000; i++) - PicoCpuFS68k.Fetch[i] = (unsigned long)Pico_mcd->prg_ram; - // WORD RAM 2M area - for (i = M68K_FETCHBANK1*0x08/0x100; i < M68K_FETCHBANK1 && (i<<(24-FAMEC_FETCHBITS)) < 0xc0000; i++) - PicoCpuFS68k.Fetch[i] = (unsigned long)Pico_mcd->word_ram2M - 0x80000; - // remap_word_ram() will setup word ram for both - } + PicoCpuFS68k.read_byte = (void *)s68k_read8; + PicoCpuFS68k.read_word = (void *)s68k_read16; + PicoCpuFS68k.read_long = (void *)s68k_read32; + PicoCpuFS68k.write_byte = (void *)s68k_write8; + PicoCpuFS68k.write_word = (void *)s68k_write16; + PicoCpuFS68k.write_long = (void *)s68k_write32; #endif #ifdef EMU_M68K m68k_mem_setup_cd(); diff --git a/pico/cd/memory_arm.s b/pico/cd/memory_arm.S similarity index 72% rename from pico/cd/memory_arm.s rename to pico/cd/memory_arm.S index e19c5613..810fe747 100644 --- a/pico/cd/memory_arm.s +++ b/pico/cd/memory_arm.S @@ -6,6 +6,9 @@ @* See COPYING file in the top-level directory. @* +#include +#include + .equiv PCM_STEP_SHIFT, 11 .text @@ -48,8 +51,8 @@ .global PicoWriteS68k16_dec_m2b1 @ externs, just for reference -.extern Pico -.extern Read_CDC_Host +.extern Pico_mcd +.extern cdc_host_r .extern m68k_reg_write8 .extern s68k_reg_read16 .extern s68k_reg_write8 @@ -57,13 +60,13 @@ .extern s68k_poll_detect .extern pcd_pcm_write .extern pcd_pcm_read -.extern PicoCpuCS68k .extern PicoRead8_io .extern PicoRead16_io .extern PicoWrite8_io .extern PicoWrite16_io .extern m68k_comm_check + PIC_LDR_INIT() @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -72,16 +75,16 @@ @ r0=addr[in,out], r1,r2=tmp .macro cell_map ands r1, r0, #0x01c000 - ldrne pc, [pc, r1, lsr #12] - beq 0f @ most common? - .long 0f - .long 0f - .long 0f - .long 0f - .long 1f - .long 1f - .long 2f - .long 3f + PIC_XB(ne ,r1, lsr #12) + b 0f @ most common? + PIC_BT(0f) + PIC_BT(0f) + PIC_BT(0f) + PIC_BT(0f) + PIC_BT(1f) + PIC_BT(1f) + PIC_BT(2f) + PIC_BT(3f) 1: @ x16 cells and r1, r0, #0x7e00 @ col and r2, r0, #0x01fc @ row @@ -127,9 +130,9 @@ PicoReadM68k8_cell1: @ 0x220000 - 0x23ffff, cell arranged mov r3, #0x0e0000 0: cell_map - ldr r1, =(Pico+0x22200) + PIC_LDR(r1, r2, Pico_mcd) add r0, r0, r3 - ldr r1, [r1] + ldr r1, [r1] @ Pico.mcd (used everywhere) eor r0, r0, #1 ldrb r0, [r1, r0] bx lr @@ -140,31 +143,35 @@ PicoRead8_mcd_io: cmp r1, #0x2000 @ a120xx? bne PicoRead8_io - ldr r1, =(Pico+0x22200) + PIC_LDR(r1, r2, Pico_mcd) and r0, r0, #0x3f - ldr r1, [r1] @ Pico.mcd (used everywhere) + ldr r1, [r1] @ Pico.mcd cmp r0, #0x0e - ldrlt pc, [pc, r0, lsl #2] + PIC_XB(lt ,r0, lsl #2) b m_m68k_read8_hi - .long m_m68k_read8_r00 - .long m_m68k_read8_r01 - .long m_m68k_read8_r02 - .long m_m68k_read8_r03 - .long m_m68k_read8_r04 - .long m_read_null @ unused bits - .long m_m68k_read8_r06 - .long m_m68k_read8_r07 - .long m_m68k_read8_r08 - .long m_m68k_read8_r09 - .long m_read_null @ reserved - .long m_read_null - .long m_m68k_read8_r0c - .long m_m68k_read8_r0d + PIC_BT(m_m68k_read8_r00) + PIC_BT(m_m68k_read8_r01) + PIC_BT(m_m68k_read8_r02) + PIC_BT(m_m68k_read8_r03) + PIC_BT(m_m68k_read8_r04) + PIC_BT(m_read_null) @ unused bits + PIC_BT(m_m68k_read8_r06) + PIC_BT(m_m68k_read8_r07) + PIC_BT(m_m68k_read8_r08) + PIC_BT(m_m68k_read8_r09) + PIC_BT(m_read_null) @ reserved + PIC_BT(m_read_null) + PIC_BT(m_m68k_read8_r0c) + PIC_BT(m_m68k_read8_r0d) m_m68k_read8_r00: add r1, r1, #0x110000 - ldr r0, [r1, #0x30] + ldr r0, [r1, #0x30] @ Pico_mcd->s68k_regs + add r1, r1, #0x002200 + ldr r1, [r1, #4] @ Pico_mcd->m.state_flags and r0, r0, #0x04000000 @ we need irq2 mask state mov r0, r0, lsr #19 + and r1, r1, #0x00000100 @ irq2 pending + orr r0, r0, r1, lsr #8 bx lr m_m68k_read8_r01: add r1, r1, #0x110000 @@ -177,9 +184,10 @@ m_m68k_read8_r02: bx lr m_m68k_read8_r03: add r1, r1, #0x110000 - push {r1, lr} + stmfd sp!, {r1, lr} + bic r0, r0, #1 bl m68k_comm_check - pop {r1, lr} + ldmfd sp!, {r1, lr} ldrb r0, [r1, #3] and r0, r0, #0xc7 bx lr @@ -191,37 +199,36 @@ m_m68k_read8_r06: ldrb r0, [r1, #0x73] @ IRQ vector bx lr m_m68k_read8_r07: + PIC_LDR(r1, r2, Pico) + ldr r1, [r1, #OFS_Pico_rom] ldrb r0, [r1, #0x72] bx lr m_m68k_read8_r08: mov r0, #0 - bl Read_CDC_Host @ TODO: make it local + bl cdc_host_r mov r0, r0, lsr #8 bx lr m_m68k_read8_r09: mov r0, #0 - b Read_CDC_Host + b cdc_host_r m_m68k_read8_r0c: - add r1, r1, #0x110000 - add r1, r1, #0x002200 - ldr r0, [r1, #0x14] @ Pico_mcd->m.timer_stopwatch - mov r0, r0, lsr #24 + mov r1, #0 + bl pcd_stopwatch_read + mov r0, r0, lsr #8 bx lr m_m68k_read8_r0d: - add r1, r1, #0x110000 - add r1, r1, #0x002200 - ldr r0, [r1, #0x14] - mov r0, r0, lsr #16 - bx lr + mov r1, #0 + b pcd_stopwatch_read m_m68k_read8_hi: cmp r0, #0x30 add r1, r1, #0x110000 movge r0, #0 bxge lr - add r1, r0 - push {r1, lr} + add r1, r1, r0 + stmfd sp!, {r1, lr} + bic r0, r0, #1 bl m68k_comm_check - pop {r1, lr} + ldmfd sp!, {r1, lr} ldrb r0, [r1] bx lr @@ -237,9 +244,9 @@ PicoReadM68k16_cell1: @ 0x220000 - 0x23ffff, cell arranged mov r3, #0x0e0000 0: cell_map - ldr r1, =(Pico+0x22200) + PIC_LDR(r1, r2, Pico_mcd) add r0, r0, r3 - ldr r1, [r1] + ldr r1, [r1] @ Pico.mcd bic r0, r0, #1 ldrh r0, [r1, r0] bx lr @@ -251,32 +258,35 @@ PicoRead16_mcd_io: bne PicoRead16_io m_m68k_read16_m68k_regs: - ldr r1, =(Pico+0x22200) + PIC_LDR(r1, r2, Pico_mcd) and r0, r0, #0x3e - ldr r1, [r1] @ Pico.mcd (used everywhere) + ldr r1, [r1] @ Pico.mcd cmp r0, #0x0e - ldrlt pc, [pc, r0, lsl #1] + PIC_XB(lt ,r0, lsl #1) b m_m68k_read16_hi - .long m_m68k_read16_r00 - .long m_m68k_read16_r02 - .long m_m68k_read16_r04 - .long m_m68k_read16_r06 - .long m_m68k_read16_r08 - .long m_read_null @ reserved - .long m_m68k_read16_r0c + PIC_BT(m_m68k_read16_r00) + PIC_BT(m_m68k_read16_r02) + PIC_BT(m_m68k_read16_r04) + PIC_BT(m_m68k_read16_r06) + PIC_BT(m_m68k_read16_r08) + PIC_BT(m_read_null) @ reserved + PIC_BT(m_m68k_read16_r0c) m_m68k_read16_r00: add r1, r1, #0x110000 ldr r0, [r1, #0x30] add r1, r1, #0x002200 - ldrb r1, [r1, #2] @ Pico_mcd->m.busreq + ldrb r2, [r1, #2] @ Pico_mcd->m.busreq + ldr r1, [r1, #4] and r0, r0, #0x04000000 @ we need irq2 mask state - orr r0, r1, r0, lsr #11 + orr r0, r2, r0, lsr #11 + and r1, r1, #0x00000100 @ irq2 pending + orr r0, r1, r0 bx lr m_m68k_read16_r02: add r1, r1, #0x110000 - push {r1, lr} + stmfd sp!, {r1, lr} bl m68k_comm_check - pop {r1, lr} + ldmfd sp!, {r1, lr} ldrb r2, [r1, #3] ldrb r0, [r1, #2] and r2, r2, #0xc7 @@ -288,17 +298,16 @@ m_m68k_read16_r04: mov r0, r0, lsl #8 bx lr m_m68k_read16_r06: + PIC_LDR(r1, r2, Pico) + ldr r1, [r1, #OFS_Pico_rom] ldrh r0, [r1, #0x72] @ IRQ vector bx lr m_m68k_read16_r08: mov r0, #0 - b Read_CDC_Host + b cdc_host_r m_m68k_read16_r0c: - add r1, r1, #0x110000 - add r1, r1, #0x002200 - ldr r0, [r1, #0x14] - mov r0, r0, lsr #16 - bx lr + mov r1, #0 + b pcd_stopwatch_read m_m68k_read16_hi: cmp r0, #0x30 add r1, r1, #0x110000 @@ -306,9 +315,9 @@ m_m68k_read16_hi: bxge lr add r1, r0, r1 - push {r1, lr} + stmfd sp!, {r1, lr} bl m68k_comm_check - pop {r0, lr} + ldmfd sp!, {r0, lr} ldrh r0, [r0] mov r1, r0, lsr #8 and r0, r0, #0xff @@ -328,8 +337,9 @@ PicoWriteM68k8_cell1: @ 0x220000 - 0x23ffff, cell arranged 0: mov r3, r1 cell_map - ldr r2, =(Pico+0x22200) + PIC_LDR(r2, r1, Pico_mcd) add r0, r0, r12 + ldr r2, [r2] @ Pico.mcd ldr r2, [r2] eor r0, r0, #1 strb r3, [r2, r0] @@ -355,9 +365,9 @@ PicoWriteM68k16_cell1: @ 0x220000 - 0x23ffff, cell arranged 0: mov r3, r1 cell_map - ldr r1, =(Pico+0x22200) + PIC_LDR(r1, r2, Pico_mcd) add r0, r0, r12 - ldr r1, [r1] + ldr r1, [r1] @ Pico.mcd bic r0, r0, #1 strh r3, [r1, r0] bx lr @@ -374,10 +384,10 @@ m_m68k_write16_regs: beq m_m68k_write16_regs_spec and r3, r1, #0xff add r2, r0, #1 - stmfd sp!,{r2,r3,lr} + stmfd sp!,{r2,r3,r12,lr} mov r1, r1, lsr #8 bl m68k_reg_write8 - ldmfd sp!,{r0,r1,lr} + ldmfd sp!,{r0,r1,r12,lr} b m68k_reg_write8 m_m68k_write16_regs_spec: @ special case @@ -397,9 +407,9 @@ PicoReadS68k8_dec0: @ 0x080000 - 0x0bffff PicoReadS68k8_dec1: mov r3, #0x0a0000 @ + ^ / 2 0: - ldr r2, =(Pico+0x22200) + PIC_LDR(r2, r1, Pico_mcd) eor r0, r0, #2 - ldr r2, [r2] + ldr r2, [r2] @ Pico.mcd movs r0, r0, lsr #1 @ +4-6 <<16 add r2, r2, r3 @ map to our address ldrb r0, [r2, r0] @@ -429,8 +439,8 @@ m_s68k_read8_regs: bx lr m_s68k_read8_comm: - ldr r1, =(Pico+0x22200) - ldr r1, [r1] + PIC_LDR(r1, r2, Pico_mcd) + ldr r1, [r1] @ Pico.mcd add r1, r1, #0x110000 ldrb r1, [r1, r0] bic r0, r0, #1 @@ -442,9 +452,9 @@ m_s68k_read8_pcm: bne m_read_null @ must not trash r3 and r12 - ldr r1, =(Pico+0x22200) + PIC_LDR(r1, r2, Pico_mcd) bic r0, r0, #0xff0000 - ldr r1, [r1] + ldr r1, [r1] @ Pico.mcd mov r2, #0x110000 orr r2, r2, #0x002200 cmp r0, #0x2000 @@ -477,9 +487,9 @@ PicoReadS68k16_dec0: @ 0x080000 - 0x0bffff PicoReadS68k16_dec1: mov r3, #0x0a0000 @ + ^ / 2 0: - ldr r2, =(Pico+0x22200) + PIC_LDR(r2, r1, Pico_mcd) eor r0, r0, #2 - ldr r2, [r2] + ldr r2, [r2] @ Pico.mcd mov r0, r0, lsr #1 @ +4-6 <<16 add r2, r2, r3 @ map to our address ldrb r0, [r2, r0] @@ -501,16 +511,15 @@ m_s68k_read16_regs: cmp r0, #8 bne s68k_reg_read16 mov r0, #1 - b Read_CDC_Host - + b cdc_host_r @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ .macro m_s68k_write8_2M_decode - ldr r2, =(Pico+0x22200) + PIC_LDR(r2, ip, Pico_mcd) eor r0, r0, #2 - ldr r2, [r2] @ Pico.rom + ldr r2, [r2] @ Pico.mcd movs r0, r0, lsr #1 @ +4-6 <<16 add r2, r2, r3 @ map to our address .endm @@ -592,9 +601,9 @@ m_s68k_write8_pcm: bxlt lr m_s68k_write8_pcm_ram: - ldr r3, =(Pico+0x22200) + PIC_LDR(r3, r2, Pico_mcd) bic r0, r0, #0x00e000 - ldr r3, [r3] + ldr r3, [r3] @ Pico.mcd mov r0, r0, lsr #1 add r2, r3, #0x110000 add r2, r2, #0x002200 @@ -606,14 +615,13 @@ m_s68k_write8_pcm_ram: strb r1, [r3, r0] bx lr - @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ .macro m_s68k_write16_2M_decode - ldr r2, =(Pico+0x22200) + PIC_LDR(r2, ip, Pico_mcd) eor r0, r0, #2 - ldr r2, [r2] + ldr r2, [r2] @ Pico.mcd mov r0, r0, lsr #1 @ +4-6 <<16 add r2, r2, r3 @ map to our address .endm @@ -692,13 +700,110 @@ m_s68k_write16_regs: bne s68k_reg_write16 m_s68k_write16_regs_spec: @ special case - ldr r2, =(Pico+0x22200) + PIC_LDR(r2, r0, Pico_mcd) mov r0, #0x110000 - ldr r2, [r2] + ldr r2, [r2] @ Pico.mcd add r0, r0, #0x00000f strb r1, [r2, r0] @ if (a == 0xe) s68k_regs[0xf] = d; bx lr +.global s68k_read8 +.global s68k_read16 +.global s68k_read32 +.global s68k_write8 +.global s68k_write16 +.global s68k_write32 + +s68k_read8: + PIC_LDR(r3, r2, s68k_read8_map) + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + eor r2, r0, #1 + movs r3, r3, lsl #1 + ldrccb r0, [r3, r2] + bxcc lr + bx r3 + +s68k_read16: + PIC_LDR(r3, r2, s68k_read16_map) + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + ldrcch r0, [r3, r0] + bxcc lr + bx r3 + +s68k_read32: + PIC_LDR(r3, r2, s68k_read16_map) + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + ldrcch r1, [r3, r0]! + ldrcch r0, [r3, #2] + orrcc r0, r0, r1, lsl #16 + bxcc lr + + stmfd sp!, {r0, r3, r4, lr} + mov lr, pc + bx r3 + ldmfd sp!, {r1, r3} + str r0, [sp] + add r0, r1, #2 + mov lr, pc + bx r3 + ldmfd sp!, {r1, lr} + mov r0, r0, lsl #16 + mov r1, r1, lsl #16 + orr r0, r1, r0, lsr #16 + bx lr + +s68k_write8: + PIC_LDR(r3, r2, s68k_write8_map) + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + eor r2, r0, #1 + movs r3, r3, lsl #1 + strccb r1, [r3, r2] + bxcc lr + bx r3 + +s68k_write16: + PIC_LDR(r3, r2, s68k_write16_map) + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + strcch r1, [r3, r0] + bxcc lr + bx r3 + +s68k_write32: + PIC_LDR(r3, r2, s68k_write16_map) + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + movcc r2, r1, lsr #16 + strcch r2, [r3, r0]! + strcch r1, [r3, #2] + bxcc lr + + stmfd sp!, {r0, r1, r3, lr} + mov r1, r1, lsr #16 + mov lr, pc + bx r3 + ldmfd sp!, {r0, r1, r3, lr} + add r0, r0, #2 + bx r3 + .pool @ vim:filetype=armasm diff --git a/pico/cd/pcm.c b/pico/cd/pcm.c index 77bfa300..b38ce2d6 100644 --- a/pico/cd/pcm.c +++ b/pico/cd/pcm.c @@ -88,29 +88,30 @@ void pcd_pcm_sync(unsigned int to) } addr = ch->addr; - inc = *(unsigned short *)&ch->regs[2]; - mul_l = ((int)ch->regs[0] * (ch->regs[1] & 0xf)) >> (5+1); - mul_r = ((int)ch->regs[0] * (ch->regs[1] >> 4)) >> (5+1); + inc = ch->regs[2] + (ch->regs[3]<<8); + mul_l = (int)ch->regs[0] * (ch->regs[1] & 0xf); + mul_r = (int)ch->regs[0] * (ch->regs[1] >> 4); - for (s = 0; s < steps; s++, addr = (addr + inc) & 0x7FFFFFF) + for (s = 0; s < steps; s++) { smp = Pico_mcd->pcm_ram[addr >> PCM_STEP_SHIFT]; // test for loop signal if (smp == 0xff) { - addr = *(unsigned short *)&ch->regs[4]; // loop_addr + addr = ch->regs[4] + (ch->regs[5]<<8); // loop_addr smp = Pico_mcd->pcm_ram[addr]; addr <<= PCM_STEP_SHIFT; if (smp == 0xff) break; - } + } else + addr = (addr + inc) & 0x07FFFFFF; if (smp & 0x80) smp = -(smp & 0x7f); - out[s*2 ] += smp * mul_l; // max 128 * 119 = 15232 - out[s*2+1] += smp * mul_r; + out[s*2 ] += (smp * mul_l) >> 5; // max 127 * 255 * 15 / 32 = 15180 + out[s*2+1] += (smp * mul_r) >> 5; } ch->addr = addr; } @@ -120,14 +121,14 @@ end: Pico_mcd->pcm_mixpos += steps; } -void pcd_pcm_update(int *buf32, int length, int stereo) +void pcd_pcm_update(s32 *buf32, int length, int stereo) { int step, *pcm; int p = 0; pcd_pcm_sync(SekCyclesDoneS68k()); - if (!Pico_mcd->pcm_mixbuf_dirty || !(PicoOpt & POPT_EN_MCD_PCM)) + if (!Pico_mcd->pcm_mixbuf_dirty || !(PicoIn.opt & POPT_EN_MCD_PCM) || !buf32) goto out; step = (Pico_mcd->pcm_mixpos << 16) / length; diff --git a/pico/cd/sek.c b/pico/cd/sek.c index 8d838628..9bb72829 100644 --- a/pico/cd/sek.c +++ b/pico/cd/sek.c @@ -32,6 +32,8 @@ static int new_irq_level(int level) { int level_new = 0, irqs; Pico_mcd->m.s68k_pend_ints &= ~(1 << level); + if (level == 2) // clear pending bit + Pico_mcd->m.state_flags &= ~PCD_ST_S68K_IFL2; irqs = Pico_mcd->m.s68k_pend_ints; irqs &= Pico_mcd->s68k_regs[0x33]; while ((irqs >>= 1)) level_new++; @@ -117,15 +119,10 @@ PICO_INTERNAL void SekInitS68k(void) } #endif #ifdef EMU_F68K - { - void *oldcontext = g_m68kcontext; - g_m68kcontext = &PicoCpuFS68k; - memset(&PicoCpuFS68k, 0, sizeof(PicoCpuFS68k)); - fm68k_init(); - PicoCpuFS68k.iack_handler = SekIntAckFS68k; - PicoCpuFS68k.sr = 0x2704; // Z flag - g_m68kcontext = oldcontext; - } + memset(&PicoCpuFS68k, 0, sizeof(PicoCpuFS68k)); + fm68k_init(); + PicoCpuFS68k.iack_handler = SekIntAckFS68k; + PicoCpuFS68k.sr = 0x2704; // Z flag #endif } @@ -149,12 +146,7 @@ PICO_INTERNAL int SekResetS68k(void) } #endif #ifdef EMU_F68K - { - void *oldcontext = g_m68kcontext; - g_m68kcontext = &PicoCpuFS68k; - fm68k_reset(); - g_m68kcontext = oldcontext; - } + fm68k_reset(&PicoCpuFS68k); #endif return 0; @@ -178,10 +170,8 @@ PICO_INTERNAL int SekInterruptS68k(int irq) PicoCpuCS68k.irq=real_irq; #endif #ifdef EMU_M68K - void *oldcontext = m68ki_cpu_p; - m68k_set_context(&PicoCpuMS68k); - m68k_set_irq(real_irq); - m68k_set_context(oldcontext); + // avoid m68k_set_irq() for delaying to work + PicoCpuMS68k.int_level = real_irq << 8; #endif #ifdef EMU_F68K PicoCpuFS68k.interrupts[0]=real_irq; @@ -189,3 +179,17 @@ PICO_INTERNAL int SekInterruptS68k(int irq) return 0; } +void SekInterruptClearS68k(int irq) +{ + int level_new = new_irq_level(irq); + +#ifdef EMU_C68K + PicoCpuCS68k.irq = level_new; +#endif +#ifdef EMU_M68K + CPU_INT_LEVEL = level_new << 8; +#endif +#ifdef EMU_F68K + PicoCpuFS68k.interrupts[0] = level_new; +#endif +} diff --git a/pico/debug.c b/pico/debug.c index 959331f8..9ee58d2a 100644 --- a/pico/debug.c +++ b/pico/debug.c @@ -8,6 +8,7 @@ #include "pico_int.h" #include "sound/ym2612.h" +#include "memory.h" #include "debug.h" #define bit(r, x) ((r>>x)&1) @@ -20,13 +21,12 @@ char *PDebugMain(void) { struct PicoVideo *pv=&Pico.video; unsigned char *reg=pv->reg, r; - extern int HighPreSpr[]; int i, sprites_lo, sprites_hi; char *dstrp; sprites_lo = sprites_hi = 0; - for (i = 0; HighPreSpr[i] != 0; i+=2) - if (HighPreSpr[i+1] & 0x8000) + for (i = 0; Pico.est.HighPreSpr[i] != 0; i+=2) + if (Pico.est.HighPreSpr[i+1] & 0x8000) sprites_hi++; else sprites_lo++; @@ -40,13 +40,19 @@ char *PDebugMain(void) sprintf(dstrp, "mode set 4: %02x\n", (r=reg[0xC])); MVP; sprintf(dstrp, "interlace: %i%i, cells: %i, shadow: %i\n", bit(r,2), bit(r,1), (r&0x80) ? 40 : 32, bit(r,3)); MVP; sprintf(dstrp, "scroll size: w: %i, h: %i SRAM: %i; eeprom: %i (%i)\n", reg[0x10]&3, (reg[0x10]&0x30)>>4, - !!(SRam.flags & SRF_ENABLED), !!(SRam.flags & SRF_EEPROM), SRam.eeprom_type); MVP; - sprintf(dstrp, "sram range: %06x-%06x, reg: %02x\n", SRam.start, SRam.end, Pico.m.sram_reg); MVP; + !!(Pico.sv.flags & SRF_ENABLED), !!(Pico.sv.flags & SRF_EEPROM), Pico.sv.eeprom_type); MVP; + sprintf(dstrp, "sram range: %06x-%06x, reg: %02x\n", Pico.sv.start, Pico.sv.end, Pico.m.sram_reg); MVP; sprintf(dstrp, "pend int: v:%i, h:%i, vdp status: %04x\n", bit(pv->pending_ints,5), bit(pv->pending_ints,4), pv->status); MVP; - sprintf(dstrp, "pal: %i, hw: %02x, frame#: %i, cycles: %i\n", Pico.m.pal, Pico.m.hardware, Pico.m.frame_count, SekCyclesDone()); MVP; - sprintf(dstrp, "M68k: PC: %06x, SR: %04x, irql: %i\n", SekPc, SekSr, SekIrqLevel); MVP; + sprintf(dstrp, "VDP regs 00-07: %02x %02x %02x %02x %02x %02x %02x %02x\n",reg[0],reg[1],reg[2],reg[3],reg[4],reg[5],reg[6],reg[7]); MVP; + sprintf(dstrp, "VDP regs 08-0f: %02x %02x %02x %02x %02x %02x %02x %02x\n",reg[8],reg[9],reg[10],reg[11],reg[12],reg[13],reg[14],reg[15]); MVP; + sprintf(dstrp, "VDP regs 10-17: %02x %02x %02x %02x %02x %02x %02x %02x\n",reg[16],reg[17],reg[18],reg[19],reg[20],reg[21],reg[22],reg[23]); MVP; + sprintf(dstrp, "VDP regs 18-1f: %02x %02x %02x %02x %02x %02x %02x %02x\n",reg[24],reg[25],reg[26],reg[27],reg[28],reg[29],reg[30],reg[31]); MVP; + r = (reg[5]<<9)+(reg[6]<<11); + sprintf(dstrp, "sprite #0: %04x %04x %04x %04x\n",PicoMem.vram[r/2],PicoMem.vram[r/2+1],PicoMem.vram[r/2+2],PicoMem.vram[r/2+3]); MVP; + sprintf(dstrp, "pal: %i, hw: %02x, frame#: %i, cycles: %u\n", Pico.m.pal, Pico.m.hardware, Pico.m.frame_count, SekCyclesDone()); MVP; + sprintf(dstrp, "M68k: PC: %06lx, SR: %04x, irql: %i\n", (ulong)SekPc, SekSr, SekIrqLevel); MVP; for (r = 0; r < 8; r++) { - sprintf(dstrp, "d%i=%08x, a%i=%08x\n", r, SekDar(r), r, SekDar(r+8)); MVP; + sprintf(dstrp, "d%i=%08lx, a%i=%08lx\n", r, (ulong)SekDar(r), r, (ulong)SekDar(r+8)); MVP; } sprintf(dstrp, "z80Run: %i, z80_reset: %i, z80_bnk: %06x\n", Pico.m.z80Run, Pico.m.z80_reset, Pico.m.z80_bank68k<<15); MVP; z80_debug(dstrp); MVP; @@ -71,7 +77,7 @@ char *PDebug32x(void) } r = Pico32x.sh2_regs; sprintf(dstrp, "SH: %04x %04x %04x IRQs: %02x eflags: %02x\n", - r[0], r[1], r[2], Pico32x.sh2irqs, Pico32x.emu_flags); MVP; + r[0], r[1], r[2], Pico32x.sh2irqi[0]|Pico32x.sh2irqi[1], Pico32x.emu_flags); MVP; i = 0; r = Pico32x.vdp_regs; @@ -80,12 +86,12 @@ char *PDebug32x(void) i*2, r[i+0], r[i+1], r[i+2], r[i+3], r[i+4], r[i+5], r[i+6], r[i+7]); MVP; sprintf(dstrp, " mSH2 sSH2\n"); MVP; - sprintf(dstrp, "PC,SR %08x, %03x %08x, %03x\n", sh2_pc(&msh2), sh2_sr(0), sh2_pc(&ssh2), sh2_sr(1)); MVP; + sprintf(dstrp, "PC,SR %08lx, %03x %08lx, %03x\n", (ulong)sh2_pc(&msh2), (uint)sh2_sr(0), (ulong)sh2_pc(&ssh2), (uint)sh2_sr(1)); MVP; for (i = 0; i < 16/2; i++) { - sprintf(dstrp, "R%d,%2d %08x,%08x %08x,%08x\n", i, i + 8, - sh2_reg(0,i), sh2_reg(0,i+8), sh2_reg(1,i), sh2_reg(1,i+8)); MVP; + sprintf(dstrp, "R%d,%2d %08lx,%08lx %08lx,%08lx\n", i, i + 8, + (ulong)sh2_reg(0,i), (ulong)sh2_reg(0,i+8), (ulong)sh2_reg(1,i), (ulong)sh2_reg(1,i+8)); MVP; } - sprintf(dstrp, "gb,vb %08x,%08x %08x,%08x\n", sh2_gbr(0), sh2_vbr(0), sh2_gbr(1), sh2_vbr(1)); MVP; + sprintf(dstrp, "gb,vb %08lx,%08lx %08lx,%08lx\n", (ulong)sh2_gbr(0), (ulong)sh2_vbr(0), (ulong)sh2_gbr(1), (ulong)sh2_vbr(1)); MVP; sprintf(dstrp, "IRQs/mask: %02x/%02x %02x/%02x\n", Pico32x.sh2irqi[0], Pico32x.sh2irq_mask[0], Pico32x.sh2irqi[1], Pico32x.sh2irq_mask[1]); MVP; #else @@ -117,7 +123,7 @@ char *PDebugSpriteList(void) unsigned int *sprite; int code, code2, sx, sy, height; - sprite=(unsigned int *)(Pico.vram+((table+(link<<2))&0x7ffc)); // Find sprite + sprite=(unsigned int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite // get sprite info code = sprite[0]; @@ -142,7 +148,11 @@ char *PDebugSpriteList(void) } #define GREEN1 0x0700 -#ifdef USE_BGR555 +#if defined(USE_BGR555) + #define YELLOW1 0x039c + #define BLUE1 0x7800 + #define RED1 0x001e +#elif defined(USE_BGR565) #define YELLOW1 0x071c #define BLUE1 0xf000 #define RED1 0x001e @@ -197,29 +207,30 @@ void PDebugShowSpriteStats(unsigned short *screen, int stride) void PDebugShowPalette(unsigned short *screen, int stride) { + struct PicoEState *est = &Pico.est; int x, y; Pico.m.dirtyPal = 1; - if (PicoAHW & PAHW_SMS) - PicoDoHighPal555M4(); + if (PicoIn.AHW & PAHW_SMS) + PicoDoHighPal555SMS(); else - PicoDoHighPal555(1); + PicoDoHighPal555(1, 0, est); Pico.m.dirtyPal = 1; screen += 16*stride+8; for (y = 0; y < 8*4; y++) for (x = 0; x < 8*16; x++) - screen[x + y*stride] = HighPal[x/8 + (y/8)*16]; + screen[x + y*stride] = est->HighPal[x/8 + (y/8)*16]; screen += 160; for (y = 0; y < 8*4; y++) for (x = 0; x < 8*16; x++) - screen[x + y*stride] = HighPal[(x/8 + (y/8)*16) | 0x40]; + screen[x + y*stride] = est->HighPal[(x/8 + (y/8)*16) | 0x40]; screen += stride*48; for (y = 0; y < 8*4; y++) for (x = 0; x < 8*16; x++) - screen[x + y*stride] = HighPal[(x/8 + (y/8)*16) | 0x80]; + screen[x + y*stride] = est->HighPal[(x/8 + (y/8)*16) | 0x80]; } #if defined(DRAW2_OVERRIDE_LINE_WIDTH) @@ -233,6 +244,7 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) struct PicoVideo *pvid=&Pico.video; int table=0,u,link=0,*sprite=0,*fsprite,oldsprite[2]; int x,y,max_sprites = 80, oldcol, oldreg; + unsigned char olddbg; if (!(pvid->reg[12]&1)) max_sprites = 64; @@ -243,41 +255,42 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) for (u=0; u < max_sprites && u <= which; u++) { - sprite=(int *)(Pico.vram+((table+(link<<2))&0x7ffc)); // Find sprite + sprite=(int *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite link=(sprite[0]>>16)&0x7f; if (!link) break; // End of sprites } if (u >= max_sprites) return; - fsprite = (int *)(Pico.vram+(table&0x7ffc)); + fsprite = (int *)(PicoMem.vram+(table&0x7ffc)); oldsprite[0] = fsprite[0]; oldsprite[1] = fsprite[1]; fsprite[0] = (sprite[0] & ~0x007f01ff) | 0x000080; fsprite[1] = (sprite[1] & ~0x01ff8000) | 0x800000; oldreg = pvid->reg[7]; - oldcol = Pico.cram[0]; + oldcol = PicoMem.cram[0]; + olddbg = pvid->debug_p; pvid->reg[7] = 0; - Pico.cram[0] = 0; - PicoDrawMask = PDRAW_SPRITES_LOW_ON; + PicoMem.cram[0] = 0; + pvid->debug_p = PVD_KILL_A | PVD_KILL_B; PicoFrameFull(); for (y = 0; y < 8*4; y++) { - unsigned char *ps = PicoDraw2FB + DRAW2_LINE_WIDTH*y + 8; + unsigned char *ps = Pico.est.Draw2FB + DRAW2_LINE_WIDTH*y + 8; for (x = 0; x < 8*4; x++) - if (ps[x]) screen[x] = HighPal[ps[x]], ps[x] = 0; + if (ps[x]) screen[x] = Pico.est.HighPal[ps[x]], ps[x] = 0; screen += stride; } fsprite[0] = oldsprite[0]; fsprite[1] = oldsprite[1]; pvid->reg[7] = oldreg; - Pico.cram[0] = oldcol; - PicoDrawMask = -1; + PicoMem.cram[0] = oldcol; + pvid->debug_p = olddbg; } -#define dump_ram(ram,fname) \ +#define dump_ram_m(ram,fname,mode) \ { \ unsigned short *sram = (unsigned short *) ram; \ FILE *f; \ @@ -285,7 +298,7 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) \ for (i = 0; i < sizeof(ram)/2; i++) \ sram[i] = (sram[i]<<8) | (sram[i]>>8); \ - f = fopen(fname, "wb"); \ + f = fopen(fname, mode); \ if (f) { \ fwrite(ram, 1, sizeof(ram), f); \ fclose(f); \ @@ -294,6 +307,9 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) sram[i] = (sram[i]<<8) | (sram[i]>>8); \ } +#define dump_ram(ram,fname) \ + dump_ram_m(ram,fname,"wb") + #define dump_ram_noswab(ram,fname) \ { \ FILE *f; \ @@ -306,21 +322,34 @@ void PDebugShowSprite(unsigned short *screen, int stride, int which) void PDebugDumpMem(void) { - dump_ram_noswab(Pico.zram, "dumps/zram.bin"); - dump_ram(Pico.cram, "dumps/cram.bin"); +#if 0 + char buf[1 << M68K_MEM_SHIFT]; + unsigned int a; + for (a = 0; ; a++) { + uptr v = m68k_read16_map[a]; + if (map_flag_set(v)) + break; + v <<= 1; + v += a << M68K_MEM_SHIFT; + memcpy(buf, (void *)v, sizeof(buf)); + dump_ram_m(buf, "dumps/cart.bin", a ? "ab" : "wb"); + } +#endif + dump_ram_noswab(PicoMem.zram, "dumps/zram.bin"); + dump_ram(PicoMem.cram, "dumps/cram.bin"); - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) { - dump_ram_noswab(Pico.vramb, "dumps/vram.bin"); + dump_ram_noswab(PicoMem.vramb, "dumps/vram.bin"); } else { - dump_ram(Pico.ram, "dumps/ram.bin"); - dump_ram(Pico.vram, "dumps/vram.bin"); - dump_ram(Pico.vsram,"dumps/vsram.bin"); + dump_ram(PicoMem.ram, "dumps/ram.bin"); + dump_ram(PicoMem.vram, "dumps/vram.bin"); + dump_ram(PicoMem.vsram,"dumps/vsram.bin"); } - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) { dump_ram(Pico_mcd->prg_ram, "dumps/prg_ram.bin"); if (Pico_mcd->s68k_regs[3]&4) // 1M mode? @@ -336,7 +365,7 @@ void PDebugDumpMem(void) } #ifndef NO_32X - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) { dump_ram(Pico32xMem->sdram, "dumps/sdram.bin"); dump_ram(Pico32xMem->dram[0], "dumps/dram0.bin"); @@ -350,50 +379,40 @@ void PDebugDumpMem(void) void PDebugZ80Frame(void) { - int lines, line_sample; + int lines; - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) return; - if (Pico.m.pal) { - lines = 312; - line_sample = 68; - } else { + if (Pico.m.pal) + lines = 313; + else lines = 262; - line_sample = 93; - } - z80_resetCycles(); - emustatus &= ~1; + PsndStartFrame(); - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) - PicoSyncZ80(line_sample*488); - if (ym2612.dacen && PsndDacLine <= line_sample) - PsndDoDAC(line_sample); - if (PsndOut) - PsndGetSamples(line_sample); - - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) { - PicoSyncZ80(224*488); + if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) { + PicoSyncZ80(Pico.t.m68c_cnt + 224 * 488); z80_int(); } - if (ym2612.dacen && PsndDacLine <= 224) - PsndDoDAC(224); - if (PsndOut) - PsndGetSamples(224); // sync z80 - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) - PicoSyncZ80(Pico.m.pal ? 151809 : 127671); // cycles adjusted for converter - if (PsndOut && ym2612.dacen && PsndDacLine <= lines-1) - PsndDoDAC(lines-1); + if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) { + Pico.t.m68c_cnt += Pico.m.pal ? 151809 : 127671; // cycles adjusted for converter + PicoSyncZ80(Pico.t.m68c_cnt); + } - timers_cycle(); + if (PicoIn.sndOut) + PsndGetSamples(lines); + + timers_cycle(Pico.t.z80c_aim); + z80_resetCycles(); + Pico.t.m68c_aim = Pico.t.m68c_cnt; } void PDebugCPUStep(void) { - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) z80_run_nr(1); else SekStepM68k(); diff --git a/pico/debugCPU.c b/pico/debugCPU.c index fd312142..caf75f2e 100644 --- a/pico/debugCPU.c +++ b/pico/debugCPU.c @@ -8,7 +8,6 @@ #include "pico_int.h" -typedef unsigned char u8; static unsigned int pppc, ops=0; extern unsigned int lastread_a, lastread_d[16], lastwrite_cyc_d[16], lastwrite_mus_d[16]; @@ -30,6 +29,7 @@ static struct Cyclone *currentC68k = NULL; #define other_is_stopped() (currentC68k->state_flags&1) #define other_is_tracing() ((currentC68k->state_flags&2)?1:0) #elif defined(EMU_F68K) +static struct M68K_CONTEXT *g_m68kcontext; #define other_set_sub(s) g_m68kcontext=(s)?&PicoCpuFS68k:&PicoCpuFM68k; #define other_get_sr() g_m68kcontext->sr #define other_dar(i) ((unsigned int*)g_m68kcontext->dreg)[i] @@ -49,7 +49,7 @@ static int otherRun(void) CycloneRun(currentC68k); return 1-currentC68k->cycles; #elif defined(EMU_F68K) - return fm68k_emulate(1, 0); + return fm68k_emulate(g_m68kcontext, 1, 0); #endif } diff --git a/pico/draw.c b/pico/draw.c index a03d8873..387f3a42 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -2,6 +2,7 @@ * line renderer * (c) Copyright Dave, 2004 * (C) notaz, 2006-2010 + * (C) irixxxx, 2019-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -13,74 +14,120 @@ * - "sonic mode" for midline palette changes (8bit mode only) * - accurate sprites (AS) [+ s/h] * - * AS and s/h both use upper bits for both priority and shadow/hilight flags. + * s/h uses upper bits for both priority and shadow/hilight flags. * "sonic mode" is autodetected, shadow/hilight is enabled by emulated game. * AS is enabled by user and takes priority over "sonic mode". * * since renderer always draws line in 8bit mode, there are 2 spare bits: - * b \ mode: s/h as sonic - * 00 normal - pal index - * 01 shadow - pal index - * 10 hilight+op spr spr pal index - * 11 shadow +op spr - pal index + * b \ mode: s/h sonic + * 00 normal pal index + * 01 hilight pal index + * 10 shadow pal index + * 11 hilight|shadow=normal pal index + * + * sprite s/h can only be correctly done after the plane rendering s/h state is + * known since the s/h result changes if there's at least one high prio plane. + * sprite op rendering is deferred until this is known, and hilight is used as + * mark since it can't occur before sprite ops: + * x1 op marker pal index + * + * low prio s/h rendering: + * - plane and non-op sprite pixels have shadow + * - s/h sprite op pixel rendering is marked with hilight (deferred) + * high prio s/h rendering: + * - plane and non-op sprite pixels are normal + * - all s/h sprite op pixels (either marked or high prio) are rendered * * not handled properly: - * - hilight op on shadow tile - * - AS + s/h (s/h sprite flag interferes with and cleared by AS code) + * - high prio s/h sprite op overlapping low prio sprite shows sprite, not A,B,G + * - in debug sprite-masked, transparent high-prio sprite px don't remove shadow */ #include "pico_int.h" +#include + +#define FORCE // layer forcing via debug register? int (*PicoScanBegin)(unsigned int num) = NULL; int (*PicoScanEnd) (unsigned int num) = NULL; static unsigned char DefHighCol[8+320+8]; -unsigned char *HighCol = DefHighCol; -static unsigned char *HighColBase = DefHighCol; -static int HighColIncrement; +unsigned char *HighColBase = DefHighCol; +int HighColIncrement; -static unsigned int DefOutBuff[320*2/2]; -void *DrawLineDest = DefOutBuff; // pointer to dest buffer where to draw this line to +static u16 DefOutBuff[320*2] ALIGNED(4); void *DrawLineDestBase = DefOutBuff; int DrawLineDestIncrement; -static int HighCacheA[41+1]; // caches for high layers -static int HighCacheB[41+1]; -int HighPreSpr[80*2+1]; // slightly preprocessed sprites +static u32 HighCacheA[41*2+1]; // caches for high layers +static u32 HighCacheB[41*2+1]; +static s32 HighPreSpr[128*2*2]; // slightly preprocessed sprites (2 banks a 128) +static int HighPreSprBank; + +u32 VdpSATCache[2*128]; // VDP sprite cache (1st 32 sprite attr bits) + +// NB don't change any defines without checking their usage in ASM + +#if defined(USE_BGR555) +#define PXCONV(t) ((t & 0x000e000e)<< 1) | ((t & 0x00e000e0)<<2) | ((t & 0x0e000e00)<<3) +#define PXMASKL 0x04210421 // 0x0c630c63, LSB for all colours +#define PXMASKH 0x39ce39ce // 0x3def3def, all but MSB for all colours +#elif defined(USE_BGR565) +#define PXCONV(t) ((t & 0x000e000e)<< 1) | ((t & 0x00e000e0)<<3) | ((t & 0x0e000e00)<<4) +#define PXMASKL 0x08610861 // 0x18e318e3 +#define PXMASKH 0x738e738e // 0x7bef7bef +#else // RGB565 +#define PXCONV(t) ((t & 0x000e000e)<<12) | ((t & 0x00e000e0)<<3) | ((t & 0x0e000e00)>>7) +#define PXMASKL 0x08610861 // 0x18e318e3 +#define PXMASKH 0x738e738e // 0x7bef7bef +#endif + +#define LF_PLANE (1 << 0) // must be = 1 +#define LF_SH (1 << 1) // must be = 2 +//#define LF_FORCE (1 << 2) + +#define LF_PLANE_A 0 +#define LF_PLANE_B 1 #define SPRL_HAVE_HI 0x80 // have hi priority sprites #define SPRL_HAVE_LO 0x40 // *lo* #define SPRL_MAY_HAVE_OP 0x20 // may have operator sprites on the line #define SPRL_LO_ABOVE_HI 0x10 // low priority sprites may be on top of hi -unsigned char HighLnSpr[240][3 + MAX_LINE_SPRITES]; // sprite_count, ^flags, tile_count, [spritep]... +#define SPRL_HAVE_X 0x08 // have sprites with x != 0 +#define SPRL_TILE_OVFL 0x04 // tile limit exceeded on previous line +#define SPRL_HAVE_MASK0 0x02 // have sprite with x == 0 in 1st slot +#define SPRL_MASKED 0x01 // lo prio masking by sprite with x == 0 active -int rendstatus, rendstatus_old; +// sprite cache. stores results of sprite parsing for each display line: +// [visible_sprites_count, sprl_flags, tile_count, sprites_processed, sprite_idx[sprite_count], last_width] +unsigned char HighLnSpr[240][4+MAX_LINE_SPRITES+1]; + +int rendstatus_old; int rendlines; -int DrawScanline; -int PicoDrawMask = -1; static int skip_next_line=0; -//unsigned short ppt[] = { 0x0f11, 0x0ff1, 0x01f1, 0x011f, 0x01ff, 0x0f1f, 0x0f0e, 0x0e7c }; - struct TileStrip { int nametab; // Position in VRAM of name table (for this tile line) int line; // Line number in pixels 0x000-0x3ff within the virtual tilemap int hscroll; // Horizontal scroll value in pixels for the line int xmask; // X-Mask (0x1f - 0x7f) for horizontal wraparound in the tilemap - int *hc; // cache for high tile codes and their positions + u32 *hc; // cache for high tile codes and their positions int cells; // cells (tiles) to draw (32 col mode doesn't need to update whole 320) }; // stuff available in asm: #ifdef _ASM_DRAW_C -void DrawWindow(int tstart, int tend, int prio, int sh); -void DrawAllSprites(unsigned char *sprited, int prio, int sh); -void DrawTilesFromCache(int *hc, int sh, int rlim); -void DrawSpritesSHi(unsigned char *sprited); -void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells); -void FinalizeLineBGR444(int sh, int line); +void DrawWindow(int tstart, int tend, int prio, int sh, + struct PicoEState *est); +void DrawAllSprites(unsigned char *sprited, int prio, int sh, + struct PicoEState *est); +void DrawTilesFromCache(u32 *hc, int sh, int rlim, + struct PicoEState *est); +void DrawSpritesSHi(unsigned char *sprited, struct PicoEState *est); +void DrawLayer(int plane_sh, u32 *hcache, int cellskip, int maxcells, + struct PicoEState *est); void *blockcpy(void *dst, const void *src, size_t n); void blockcpy_or(void *dst, void *src, size_t n, int pat); #else @@ -88,202 +135,305 @@ void blockcpy_or(void *dst, void *src, size_t n, int pat); void blockcpy_or(void *dst, void *src, size_t n, int pat) { unsigned char *pd = dst, *ps = src; - for (; n; n--) - *pd++ = (unsigned char) (*ps++ | pat); + if (dst > src) { + for (pd += n, ps += n; n; n--) + *--pd = (unsigned char) (*--ps | pat); + } else + for (; n; n--) + *pd++ = (unsigned char) (*ps++ | pat); } -#define blockcpy memcpy +#define blockcpy memmove #endif - -#define TileNormMaker(funcname,pix_func) \ -static int funcname(int sx,int addr,int pal) \ +#define TileNormMaker_(pix_func,ret) \ { \ - unsigned char *pd = HighCol+sx; \ - unsigned int pack=0; unsigned int t=0; \ + unsigned char t; \ \ - pack=*(unsigned int *)(Pico.vram+addr); /* Get 8 pixels */ \ - if (pack) \ - { \ - t=(pack&0x0000f000)>>12; pix_func(0); \ - t=(pack&0x00000f00)>> 8; pix_func(1); \ - t=(pack&0x000000f0)>> 4; pix_func(2); \ - t=(pack&0x0000000f) ; pix_func(3); \ - t=(pack&0xf0000000)>>28; pix_func(4); \ - t=(pack&0x0f000000)>>24; pix_func(5); \ - t=(pack&0x00f00000)>>20; pix_func(6); \ - t=(pack&0x000f0000)>>16; pix_func(7); \ - return 0; \ - } \ - \ - return 1; /* Tile blank */ \ + t = (pack&0x0000f000)>>12; pix_func(0); \ + t = (pack&0x00000f00)>> 8; pix_func(1); \ + t = (pack&0x000000f0)>> 4; pix_func(2); \ + t = (pack&0x0000000f) ; pix_func(3); \ + t = (pack&0xf0000000)>>28; pix_func(4); \ + t = (pack&0x0f000000)>>24; pix_func(5); \ + t = (pack&0x00f00000)>>20; pix_func(6); \ + t = (pack&0x000f0000)>>16; pix_func(7); \ + return ret; \ } - -#define TileFlipMaker(funcname,pix_func) \ -static int funcname(int sx,int addr,int pal) \ +#define TileFlipMaker_(pix_func,ret) \ { \ - unsigned char *pd = HighCol+sx; \ - unsigned int pack=0; unsigned int t=0; \ + unsigned char t; \ \ - pack=*(unsigned int *)(Pico.vram+addr); /* Get 8 pixels */ \ - if (pack) \ - { \ - t=(pack&0x000f0000)>>16; pix_func(0); \ - t=(pack&0x00f00000)>>20; pix_func(1); \ - t=(pack&0x0f000000)>>24; pix_func(2); \ - t=(pack&0xf0000000)>>28; pix_func(3); \ - t=(pack&0x0000000f) ; pix_func(4); \ - t=(pack&0x000000f0)>> 4; pix_func(5); \ - t=(pack&0x00000f00)>> 8; pix_func(6); \ - t=(pack&0x0000f000)>>12; pix_func(7); \ - return 0; \ - } \ - \ - return 1; /* Tile blank */ \ + t = (pack&0x000f0000)>>16; pix_func(0); \ + t = (pack&0x00f00000)>>20; pix_func(1); \ + t = (pack&0x0f000000)>>24; pix_func(2); \ + t = (pack&0xf0000000)>>28; pix_func(3); \ + t = (pack&0x0000000f) ; pix_func(4); \ + t = (pack&0x000000f0)>> 4; pix_func(5); \ + t = (pack&0x00000f00)>> 8; pix_func(6); \ + t = (pack&0x0000f000)>>12; pix_func(7); \ + return ret; \ } +#define TileNormMaker(funcname, pix_func) \ +static void funcname(unsigned char *pd, unsigned int pack, unsigned char pal) \ +TileNormMaker_(pix_func,) -#ifdef _ASM_DRAW_C_AMIPS -int TileNorm(int sx,int addr,int pal); -int TileFlip(int sx,int addr,int pal); -#else +#define TileFlipMaker(funcname, pix_func) \ +static void funcname(unsigned char *pd, unsigned int pack, unsigned char pal) \ +TileFlipMaker_(pix_func,) +#define TileNormMakerAS(funcname, pix_func) \ +static unsigned funcname(unsigned m, unsigned char *pd, unsigned int pack, unsigned char pal) \ +TileNormMaker_(pix_func,m) + +#define TileFlipMakerAS(funcname, pix_func) \ +static unsigned funcname(unsigned m, unsigned char *pd, unsigned int pack, unsigned char pal) \ +TileFlipMaker_(pix_func,m) + +// draw layer or non-s/h sprite pixels (no operator colors) #define pix_just_write(x) \ - if (t) pd[x]=pal|t + if (likely(t)) pd[x]=pal|t -TileNormMaker(TileNorm,pix_just_write) -TileFlipMaker(TileFlip,pix_just_write) - -#endif +TileNormMaker(TileNorm, pix_just_write) +TileFlipMaker(TileFlip, pix_just_write) #ifndef _ASM_DRAW_C -// draw a sprite pixel, process operator colors +// draw low prio sprite non-s/h pixels in s/h mode +#define pix_nonsh(x) \ + if (likely(t)) { \ + pd[x]=pal|t; \ + if (unlikely(t==0xe)) pd[x]&=~0x80; /* disable shadow for color 14 (hw bug?) */ \ + } + +TileNormMaker(TileNormNonSH, pix_nonsh) +TileFlipMaker(TileFlipNonSH, pix_nonsh) + +// draw sprite pixels, process operator colors #define pix_sh(x) \ - if (!t); \ - else if (t>=0xe) pd[x]=(pd[x]&0x3f)|(t<<6); /* c0 shadow, 80 hilight */ \ - else pd[x]=pal|t + if (likely(t)) \ + pd[x]=(likely(t<0xe) ? pal|t : pd[x]|((t-1)<<6)) TileNormMaker(TileNormSH, pix_sh) TileFlipMaker(TileFlipSH, pix_sh) -// draw a sprite pixel, mark operator colors +// draw sprite pixels, mark but don't process operator colors #define pix_sh_markop(x) \ - if (!t); \ - else if (t>=0xe) pd[x]|=0x80; \ - else pd[x]=pal|t + if (likely(t)) \ + pd[x]=(likely(t<0xe) ? pal|t : pd[x]|0x40) TileNormMaker(TileNormSH_markop, pix_sh_markop) TileFlipMaker(TileFlipSH_markop, pix_sh_markop) -// process operator pixels only, apply only on low pri tiles and other op pixels +#endif + +// draw low prio sprite operator pixels if visible (i.e. marked) #define pix_sh_onlyop(x) \ - if (t>=0xe && (pd[x]&0xc0)) \ - pd[x]=(pd[x]&0x3f)|(t<<6); /* c0 shadow, 80 hilight */ \ + if (unlikely(t>=0xe && (pd[x]&0x40))) \ + pd[x]=(pd[x]&~0x40)|((t-1)<<6) + +#ifndef _ASM_DRAW_C TileNormMaker(TileNormSH_onlyop_lp, pix_sh_onlyop) TileFlipMaker(TileFlipSH_onlyop_lp, pix_sh_onlyop) #endif -// draw a sprite pixel (AS) +// AS: sprite mask bits in m shifted to bits 8-15, see DrawSpritesHiAS + +// draw high prio sprite pixels (AS) #define pix_as(x) \ - if (t && !(pd[x]&0x80)) pd[x]=pal|t + if (likely(t && (m & (1<<(x+8))))) \ + m &= ~(1<<(x+8)), pd[x] = pal|t -TileNormMaker(TileNormAS, pix_as) -TileFlipMaker(TileFlipAS, pix_as) +TileNormMakerAS(TileNormAS, pix_as) +TileFlipMakerAS(TileFlipAS, pix_as) -// draw a sprite pixel, skip operator colors (AS) -#define pix_sh_as_noop(x) \ - if (t && t < 0xe && !(pd[x]&0x80)) pd[x]=pal|t +// draw high prio sprite pixels, process operator colors (AS) +// NB sprite+planes: h+s->n, h+[nh]->h, s+[nhs]->s, hence mask h before op +#define pix_sh_as(x) \ + if (likely(t && (m & (1<<(x+8))))) { \ + m &= ~(1<<(x+8)); \ + pd[x]=(likely(t<0xe) ? pal|t : (pd[x]&~0x40)|((t-1)<<6)); \ + } -TileNormMaker(TileNormAS_noop, pix_sh_as_noop) -TileFlipMaker(TileFlipAS_noop, pix_sh_as_noop) +TileNormMakerAS(TileNormSH_AS, pix_sh_as) +TileFlipMakerAS(TileFlipSH_AS, pix_sh_as) -// mark pixel as sprite pixel (AS) +// draw only sprite operator pixels (AS) +#define pix_sh_as_onlyop(x) \ + if (likely(t && (m & (1<<(x+8))))) { \ + m &= ~(1<<(x+8)); \ + pix_sh_onlyop(x); \ + } + +TileNormMakerAS(TileNormSH_AS_onlyop_lp, pix_sh_as_onlyop) +TileFlipMakerAS(TileFlipSH_AS_onlyop_lp, pix_sh_as_onlyop) + +// mark low prio sprite pixels (AS) #define pix_sh_as_onlymark(x) \ - if (t) pd[x]|=0x80 + if (likely(t)) m &= ~(1<<(x+8)) -TileNormMaker(TileNormAS_onlymark, pix_sh_as_onlymark) -TileFlipMaker(TileFlipAS_onlymark, pix_sh_as_onlymark) +TileNormMakerAS(TileNormAS_onlymark, pix_sh_as_onlymark) +TileFlipMakerAS(TileFlipAS_onlymark, pix_sh_as_onlymark) +#ifdef FORCE +// NB s/h already resolved by non-forced drawing +// forced both layer draw (through debug reg) +#define pix_and(x) \ + pal |= 0xc0; /* leave s/h bits untouched in pixel "and" */ \ + pd[x] &= pal|t + +TileNormMaker(TileNorm_and, pix_and) +TileFlipMaker(TileFlip_and, pix_and) + +// forced sprite draw (through debug reg) +#define pix_sh_as_and(x) \ + pal |= 0xc0; /* leave s/h bits untouched in pixel "and" */ \ + if (likely(m & (1<<(x+8)))) { \ + m &= ~(1<<(x+8)); \ + /* if (!t) pd[x] |= 0x40; as per titan hw notes? */ \ + pd[x] &= pal|t; \ + } + +TileNormMakerAS(TileNormSH_AS_and, pix_sh_as_and) +TileFlipMakerAS(TileFlipSH_AS_and, pix_sh_as_and) +#endif // -------------------------------------------- #ifndef _ASM_DRAW_C -static void DrawStrip(struct TileStrip *ts, int plane_sh, int cellskip) +#define DrawTile(mask) { \ + if (code!=oldcode) { \ + oldcode = code; \ + \ + pack = 0; \ + if (code != blank) { \ + /* Get tile address/2: */ \ + u32 addr = ((code&0x7ff)<<4) + ty; \ + if (code & 0x1000) addr ^= 0xe; /* Y-flip */ \ + \ + pal = ((code>>9)&0x30) | sh; /* shadow */ \ + \ + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); \ + if (!pack) \ + blank = code; \ + } \ + } \ + \ + if (code & 0x8000) { /* (un-forced) high priority tile */ \ + if (sh | (pack&mask)) { \ + code |= (dx<<16) | (ty<<25); \ + if (code & 0x1000) code ^= 0xe<<25; \ + *hc++ = code, *hc++ = pack&mask; /* cache it */ \ + } \ + } else if (pack&mask) { \ + if (code & 0x0800) TileFlip(pd + dx, pack&mask, pal); \ + else TileNorm(pd + dx, pack&mask, pal); \ + } \ +} + +static void DrawStrip(struct TileStrip *ts, int lflags, int cellskip) { - int tilex,dx,ty,code=0,addr=0,cells; - int oldcode=-1,blank=-1; // The tile we know is blank - int pal=0,sh; + unsigned char *pd = Pico.est.HighCol; + u32 *hc = ts->hc; + int tilex, dx, ty, cells; + u32 code, oldcode = -1, blank = -1; // The tile we know is blank + unsigned int pal = 0, pack = 0, sh, mask = ~0; // Draw tiles across screen: - sh=(plane_sh<<5)&0x40; + sh = (lflags & LF_SH) << 6; // shadow tilex=((-ts->hscroll)>>3)+cellskip; ty=(ts->line&7)<<1; // Y-Offset into tile dx=((ts->hscroll-1)&7)+1; cells = ts->cells - cellskip; - if(dx != 8) cells++; // have hscroll, need to draw 1 cell more dx+=cellskip<<3; - for (; cells > 0; dx+=8,tilex++,cells--) + if (dx & 7) { + code = PicoMem.vram[ts->nametab + (tilex & ts->xmask)]; + mask = 0xffffffff<<((dx&7)*4); + if (code & 0x0800) mask = 0xffffffff>>((dx&7)*4); + mask = (~mask << 16) | (~mask >> 16); + + DrawTile(mask); + dx += 8, tilex++, cells--; + } + +// int force = (lflags&LF_FORCE) << 13; + for (; cells > 0; dx+=8, tilex++, cells--) { - int zero=0; + code = PicoMem.vram[ts->nametab + (tilex & ts->xmask)]; +// code &= ~force; // forced always draw everything - code=Pico.vram[ts->nametab+(tilex&ts->xmask)]; - if (code==blank) continue; - if (code>>15) { // high priority tile - int cval = code | (dx<<16) | (ty<<25); - if(code&0x1000) cval^=7<<26; - *ts->hc++ = cval; // cache it + if (code == blank && !((code & 0x8000) && sh)) continue; + + DrawTile(~0); + } + + if (dx & 7) { + code = PicoMem.vram[ts->nametab + (tilex & ts->xmask)]; +// code &= ~force; // forced always draw everything + if (!(code == blank && !((code & 0x8000) && sh))) { + mask = 0xffffffff<<((dx&7)*4); + if (code & 0x0800) mask = 0xffffffff>>((dx&7)*4); + mask = (mask << 16) | (mask >> 16); + + DrawTile(mask); } - - if (code!=oldcode) { - oldcode = code; - // Get tile address/2: - addr=(code&0x7ff)<<4; - addr+=ty; - if (code&0x1000) addr^=0xe; // Y-flip - - pal=((code>>9)&0x30)|sh; - } - - if (code&0x0800) zero=TileFlip(dx,addr,pal); - else zero=TileNorm(dx,addr,pal); - - if (zero) blank=code; // We know this tile is blank now } // terminate the cache list - *ts->hc = 0; + *hc = 0; + // if oldcode wasn't changed, it means all layer is hi priority - if (oldcode == -1) rendstatus |= PDRAW_PLANE_HI_PRIO; + if (oldcode == -1) Pico.est.rendstatus |= PDRAW_PLANE_HI_PRIO; } // this is messy -void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) +static void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) { - int tilex,dx,code=0,addr=0,cell=0; - int oldcode=-1,blank=-1; // The tile we know is blank - int pal=0,scan=DrawScanline; + unsigned char *pd = Pico.est.HighCol; + u32 *hc = ts->hc; + int tilex, dx, ty = 0, addr = 0, cell = 0, nametabadd = 0; + u32 oldcode = -1, blank = -1; // The tile we know is blank + unsigned int pal = 0, scan = Pico.est.DrawScanline, sh, plane; // Draw tiles across screen: + sh = (plane_sh & LF_SH) << 6; // shadow + plane = (plane_sh & LF_PLANE); // plane to draw tilex=(-ts->hscroll)>>3; dx=((ts->hscroll-1)&7)+1; - if(dx != 8) cell--; // have hscroll, start with negative cell + if (ts->hscroll & 0x0f) { + int adj = ((ts->hscroll ^ dx) >> 3) & 1; + cell -= adj + 1; + ts->cells -= adj; + PicoMem.vsram[0x3e] = PicoMem.vsram[0x3f] = plane_sh >> 16; + } cell+=cellskip; tilex+=cellskip; dx+=cellskip<<3; +// int force = (plane_sh&LF_FORCE) << 13; + if ((cell&1)==1) + { + int line,vscroll; + vscroll = PicoMem.vsram[plane + (cell&0x3e)]; + + // Find the line in the name table + line=(vscroll+scan)&ts->line&0xffff; // ts->line is really ymask .. + nametabadd=(line>>3)<<(ts->line>>24); // .. and shift[width] + ty=(line&7)<<1; // Y-Offset into tile + } for (; cell < ts->cells; dx+=8,tilex++,cell++) { - int zero=0,nametabadd,ty; + u32 code, pack; - //if((cell&1)==0) + if ((cell&1)==0) { int line,vscroll; - vscroll=Pico.vsram[(plane_sh&1)+(cell&~1)]; + vscroll = PicoMem.vsram[plane + (cell&0x3e)]; // Find the line in the name table line=(vscroll+scan)&ts->line&0xffff; // ts->line is really ymask .. @@ -291,92 +441,136 @@ void DrawStripVSRam(struct TileStrip *ts, int plane_sh, int cellskip) ty=(line&7)<<1; // Y-Offset into tile } - code=Pico.vram[ts->nametab+nametabadd+(tilex&ts->xmask)]; - if (code==blank) continue; - if (code>>15) { // high priority tile - int cval = code | (dx<<16) | (ty<<25); - if(code&0x1000) cval^=7<<26; - *ts->hc++ = cval; // cache it + code= PicoMem.vram[ts->nametab + nametabadd + (tilex & ts->xmask)]; +// code &= ~force; // forced always draw everything + code |= ty<<25; // add ty since that can change pixel row for every 2nd tile + + if (code == blank && !((code & 0x8000) && sh)) continue; - } if (code!=oldcode) { oldcode = code; // Get tile address/2: - addr=(code&0x7ff)<<4; - if (code&0x1000) addr+=14-ty; else addr+=ty; // Y-flip + addr = (code&0x7ff)<<4; - pal=((code>>9)&0x30)|((plane_sh<<5)&0x40); + pal = ((code>>9)&0x30) | sh; // shadow } - if (code&0x0800) zero=TileFlip(dx,addr,pal); - else zero=TileNorm(dx,addr,pal); + pack = (code & 0x1000 ? ty^0xe : ty); // Y-flip + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr+pack)); + if (!pack) + blank = code; - if (zero) blank=code; // We know this tile is blank now + if (code & 0x8000) { // (un-forced) high priority tile + code |= (dx<<16); + if (code & 0x1000) code ^= 0xe<<25; + *hc++ = code, *hc++ = pack; // cache it + } else if (code != blank) { + if (code & 0x0800) TileFlip(pd + dx, pack, pal); + else TileNorm(pd + dx, pack, pal); + } } // terminate the cache list - *ts->hc = 0; - if (oldcode == -1) rendstatus |= PDRAW_PLANE_HI_PRIO; + *hc = 0; + + if (oldcode == -1) Pico.est.rendstatus |= PDRAW_PLANE_HI_PRIO; } #endif +#define DrawTileInterlace(mask) { \ + if (code!=oldcode) { \ + oldcode = code; \ + \ + pack = 0; \ + if (code != blank) { \ + /* Get tile address/2: */ \ + u32 addr = ((code&0x3ff)<<5) + ty; \ + if (code & 0x1000) addr ^= 0x1e; /* Y-flip */ \ + \ + pal = ((code>>9)&0x30) | sh; /* shadow */ \ + \ + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); \ + if (!pack) \ + blank = code; \ + } \ + } \ + \ + if (code & 0x8000) { /* high priority tile */ \ + if (sh | (pack&mask)) { \ + code = (code&0xfc00) | ((code&0x3ff)<<1) | (dx<<16) | (ty<<25); \ + if (code & 0x1000) code ^= 0x1e<<25; \ + *hc++ = code, *hc++ = pack&mask; /* cache it */ \ + } \ + } else if (pack&mask) { \ + if (code & 0x0800) TileFlip(pd + dx, pack&mask, pal); \ + else TileNorm(pd + dx, pack&mask, pal); \ + } \ +} + #ifndef _ASM_DRAW_C static #endif -void DrawStripInterlace(struct TileStrip *ts) +void DrawStripInterlace(struct TileStrip *ts, int plane_sh) { - int tilex=0,dx=0,ty=0,code=0,addr=0,cells; - int oldcode=-1,blank=-1; // The tile we know is blank - int pal=0; + unsigned char *pd = Pico.est.HighCol; + u32 *hc = ts->hc; + int tilex = 0, dx = 0, ty = 0, cells; + u32 code, oldcode = -1, blank = -1; // The tile we know is blank + unsigned int pal = 0, pack = 0, sh, mask = ~0; // Draw tiles across screen: + sh = (plane_sh & LF_SH) << 6; // shadow tilex=(-ts->hscroll)>>3; ty=(ts->line&15)<<1; // Y-Offset into tile dx=((ts->hscroll-1)&7)+1; cells = ts->cells; - if(dx != 8) cells++; // have hscroll, need to draw 1 cell more + if (dx & 7) { + code = PicoMem.vram[ts->nametab + (tilex & ts->xmask)]; + mask = 0xffffffff<<(dx*4); + if (code & 0x0800) mask = 0xffffffff>>(dx*4); + mask = (~mask << 16) | (~mask >> 16); + + DrawTileInterlace(mask); + dx += 8, tilex++, cells--; + } + +// int force = (plane_sh&LF_FORCE) << 13; for (; cells; dx+=8,tilex++,cells--) { - int zero=0; + u32 code = PicoMem.vram[ts->nametab + (tilex & ts->xmask)]; +// code &= ~force; // forced always draw everything - code=Pico.vram[ts->nametab+(tilex&ts->xmask)]; - if (code==blank) continue; - if (code>>15) { // high priority tile - int cval = (code&0xfc00) | (dx<<16) | (ty<<25); - cval|=(code&0x3ff)<<1; - if(code&0x1000) cval^=0xf<<26; - *ts->hc++ = cval; // cache it + if (code == blank && !(code & 0x8000)) continue; + + DrawTileInterlace(~0); + } + + if (dx & 7) { + code = PicoMem.vram[ts->nametab + (tilex & ts->xmask)]; +// code &= ~force; // forced always draw everything + if (!(code == blank && !((code & 0x8000) && sh))) { + mask = 0xffffffff<<((dx&7)*4); + if (code & 0x0800) mask = 0xffffffff>>((dx&7)*4); + mask = (mask << 16) | (mask >> 16); + + DrawTileInterlace(mask); } - - if (code!=oldcode) { - oldcode = code; - // Get tile address/2: - addr=(code&0x7ff)<<5; - if (code&0x1000) addr+=30-ty; else addr+=ty; // Y-flip - -// pal=Pico.cram+((code>>9)&0x30); - pal=((code>>9)&0x30); - } - - if (code&0x0800) zero=TileFlip(dx,addr,pal); - else zero=TileNorm(dx,addr,pal); - - if (zero) blank=code; // We know this tile is blank now } // terminate the cache list - *ts->hc = 0; + *hc = 0; } // -------------------------------------------- #ifndef _ASM_DRAW_C -static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells) +static void DrawLayer(int plane_sh, u32 *hcache, int cellskip, int maxcells, + struct PicoEState *est) { - struct PicoVideo *pvid=&Pico.video; + struct PicoVideo *pvid=&est->Pico->video; const char shift[4]={5,6,5,7}; // 32,64 or 128 sized tilemaps (2 is invalid) struct TileStrip ts; int width, height, ymask; @@ -393,40 +587,50 @@ static void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells) ts.xmask=(1<1) ymask =0x0ff; + switch (width) { + case 1: ymask &= 0x1ff; break; + case 2: ymask = 0x007; break; + case 3: ymask = 0x0ff; break; + } // Find name table: - if (plane_sh&1) ts.nametab=(pvid->reg[4]&0x07)<<12; // B - else ts.nametab=(pvid->reg[2]&0x38)<< 9; // A + if (plane_sh&LF_PLANE) ts.nametab=(pvid->reg[4]&0x07)<<12; // B + else ts.nametab=(pvid->reg[2]&0x38)<< 9; // A htab=pvid->reg[13]<<9; // Horizontal scroll table address - if ( pvid->reg[11]&2) htab+=DrawScanline<<1; // Offset by line - if ((pvid->reg[11]&1)==0) htab&=~0xf; // Offset by tile - htab+=plane_sh&1; // A or B + switch (pvid->reg[11]&3) { + case 1: htab += (est->DrawScanline<<1) & 0x0f; break; + case 2: htab += (est->DrawScanline<<1) & ~0x0f; break; // Offset by tile + case 3: htab += (est->DrawScanline<<1); break; // Offset by line + } + htab+=plane_sh&LF_PLANE; // A or B // Get horizontal scroll value, will be masked later - ts.hscroll=Pico.vram[htab&0x7fff]; + ts.hscroll = PicoMem.vram[htab & 0x7fff]; if((pvid->reg[12]&6) == 6) { // interlace mode 2 - vscroll=Pico.vsram[plane_sh&1]; // Get vertical scroll value + vscroll = PicoMem.vsram[plane_sh&LF_PLANE]; // Get vertical scroll value // Find the line in the name table - ts.line=(vscroll+(DrawScanline<<1))&((ymask<<1)|1); + ts.line=(vscroll+(est->DrawScanline<<1))&((ymask<<1)|1); ts.nametab+=(ts.line>>4)<reg[11]&4) { + DrawStripInterlace(&ts, plane_sh); + } else if (pvid->reg[11]&4) { // shit, we have 2-cell column based vscroll // luckily this doesn't happen too often ts.line=ymask|(shift[width]<<24); // save some stuff instead of line + // vscroll value for leftmost cells in case of hscroll not on 16px boundary + // XXX it's unclear what exactly the hw is doing. Continue reading where it + // stopped last seems to work best (H40: 0x50 (wrap->0x00), H32 0x40). + plane_sh |= PicoMem.vsram[(pvid->reg[12]&1?0x00:0x20) + (plane_sh&LF_PLANE)] << 16; DrawStripVSRam(&ts, plane_sh, cellskip); } else { - vscroll=Pico.vsram[plane_sh&1]; // Get vertical scroll value + vscroll = PicoMem.vsram[plane_sh&LF_PLANE]; // Get vertical scroll value // Find the line in the name table - ts.line=(vscroll+DrawScanline)&ymask; + ts.line=(vscroll+est->DrawScanline)&ymask; ts.nametab+=(ts.line>>3)<HighCol; + struct PicoVideo *pvid = &est->Pico->video; int tilex,ty,nametab,code=0; int blank=-1; // The tile we know is blank @@ -447,190 +653,183 @@ static void DrawWindow(int tstart, int tend, int prio, int sh) // int *hcache if (pvid->reg[12]&1) { nametab=(pvid->reg[3]&0x3c)<<9; // 40-cell mode - nametab+=(DrawScanline>>3)<<6; + nametab+=(est->DrawScanline>>3)<<6; } else { nametab=(pvid->reg[3]&0x3e)<<9; // 32-cell mode - nametab+=(DrawScanline>>3)<<5; + nametab+=(est->DrawScanline>>3)<<5; } tilex=tstart<<1; - if (!(rendstatus & PDRAW_WND_DIFF_PRIO)) { - // check the first tile code - code=Pico.vram[nametab+tilex]; - // if the whole window uses same priority (what is often the case), we may be able to skip this field - if ((code>>15) != prio) return; + if (prio && !(est->rendstatus & PDRAW_WND_DIFF_PRIO)) { + // all tiles processed in low prio pass + return; } tend<<=1; - ty=(DrawScanline&7)<<1; // Y-Offset into tile + ty=(est->DrawScanline&7)<<1; // Y-Offset into tile // Draw tiles across screen: if (!sh) { for (; tilex < tend; tilex++) { - int addr=0,zero=0; + unsigned int pack; + int dx, addr; int pal; - code=Pico.vram[nametab+tilex]; - if (code==blank) continue; + code = PicoMem.vram[nametab + tilex]; if ((code>>15) != prio) { - rendstatus |= PDRAW_WND_DIFF_PRIO; + est->rendstatus |= PDRAW_WND_DIFF_PRIO; continue; } - - pal=((code>>9)&0x30); + if (code==blank) continue; // Get tile address/2: addr=(code&0x7ff)<<4; if (code&0x1000) addr+=14-ty; else addr+=ty; // Y-flip - if (code&0x0800) zero=TileFlip(8+(tilex<<3),addr,pal); - else zero=TileNorm(8+(tilex<<3),addr,pal); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); + if (!pack) { + blank = code; + continue; + } - if (zero) blank=code; // We know this tile is blank now + pal = ((code >> 9) & 0x30); + dx = 8 + (tilex << 3); + + if (code & 0x0800) TileFlip(pd + dx, pack, pal); + else TileNorm(pd + dx, pack, pal); } } else { for (; tilex < tend; tilex++) { - int addr=0,zero=0; + unsigned int pack; + int dx, addr; int pal; - code=Pico.vram[nametab+tilex]; - if(code==blank) continue; + code = PicoMem.vram[nametab + tilex]; if((code>>15) != prio) { - rendstatus |= PDRAW_WND_DIFF_PRIO; + est->rendstatus |= PDRAW_WND_DIFF_PRIO; continue; } pal=((code>>9)&0x30); if (prio) { - int *zb = (int *)(HighCol+8+(tilex<<3)); - *zb++ &= 0xbfbfbfbf; - *zb &= 0xbfbfbfbf; + int *zb = (int *)(est->HighCol+8+(tilex<<3)); + *zb++ &= 0x7f7f7f7f; + *zb &= 0x7f7f7f7f; } else { - pal |= 0x40; + pal |= 0x80; } + if(code==blank) continue; // Get tile address/2: addr=(code&0x7ff)<<4; if (code&0x1000) addr+=14-ty; else addr+=ty; // Y-flip - if (code&0x0800) zero=TileFlip(8+(tilex<<3),addr,pal); - else zero=TileNorm(8+(tilex<<3),addr,pal); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); + if (!pack) { + blank = code; + continue; + } - if (zero) blank=code; // We know this tile is blank now + dx = 8 + (tilex << 3); + + if (code & 0x0800) TileFlip(pd + dx, pack, pal); + else TileNorm(pd + dx, pack, pal); } } } // -------------------------------------------- -static void DrawTilesFromCacheShPrep(void) +static void DrawTilesFromCache(u32 *hc, int sh, int rlim, struct PicoEState *est) { - // as some layer has covered whole line with hi priority tiles, - // we can process whole line and then act as if sh/hi mode was off, - // but leave lo pri op sprite markers alone - int c = 320/4, *zb = (int *)(HighCol+8); - rendstatus |= PDRAW_SHHI_DONE; - while (c--) - { - *zb++ &= 0xbfbfbfbf; - } -} - -static void DrawTilesFromCache(int *hc, int sh, int rlim) -{ - int code, addr, dx; + unsigned char *pd = est->HighCol; + u32 code, dx; + u32 pack; int pal; // *ts->hc++ = code | (dx<<16) | (ty<<25); // cache it - if (sh && (rendstatus & (PDRAW_SHHI_DONE|PDRAW_PLANE_HI_PRIO))) + if (sh && (est->rendstatus & (PDRAW_SHHI_DONE|PDRAW_PLANE_HI_PRIO))) { - if (!(rendstatus & PDRAW_SHHI_DONE)) - DrawTilesFromCacheShPrep(); + if (!(est->rendstatus & PDRAW_SHHI_DONE)) { + // as some layer has covered whole line with hi priority tiles, + // we can process whole line and then act as if sh/hi mode was off, + // but leave lo pri op sprite markers alone + int *zb = (int *)(Pico.est.HighCol+8); + int c = rlim / 4; + while (c--) + { + *zb++ &= 0x7f7f7f7f; + } + Pico.est.rendstatus |= PDRAW_SHHI_DONE; + } sh = 0; } if (!sh) { - short blank=-1; // The tile we know is blank while ((code=*hc++)) { - int zero; - if((short)code == blank) continue; - // Get tile address/2: - addr=(code&0x7ff)<<4; - addr+=(unsigned int)code>>25; // y offset into tile - dx=(code>>16)&0x1ff; + pack = *hc++; + if (rlim-dx < 0) + goto last_cut_tile; + if (!pack) + continue; - pal=((code>>9)&0x30); - if (rlim-dx < 0) goto last_cut_tile; + dx = (code >> 16) & 0x1ff; + pal = ((code >> 9) & 0x30); - if (code&0x0800) zero=TileFlip(dx,addr,pal); - else zero=TileNorm(dx,addr,pal); - - if (zero) blank=(short)code; + if (code & 0x0800) TileFlip(pd + dx, pack, pal); + else TileNorm(pd + dx, pack, pal); } } else { while ((code=*hc++)) { unsigned char *zb; - // Get tile address/2: - addr=(code&0x7ff)<<4; - addr+=(unsigned int)code>>25; // y offset into tile - dx=(code>>16)&0x1ff; - zb = HighCol+dx; - *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; - *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; *zb++ &= 0xbf; - pal=((code>>9)&0x30); - if (rlim-dx < 0) goto last_cut_tile; + dx = (code >> 16) & 0x1ff; + zb = est->HighCol+dx; + *zb++ &= 0x7f; *zb++ &= 0x7f; *zb++ &= 0x7f; *zb++ &= 0x7f; + *zb++ &= 0x7f; *zb++ &= 0x7f; *zb++ &= 0x7f; *zb++ &= 0x7f; - if (code&0x0800) TileFlip(dx,addr,pal); - else TileNorm(dx,addr,pal); + pack = *hc++; + if (rlim - dx < 0) + goto last_cut_tile; + if (!pack) + continue; + + pal = ((code >> 9) & 0x30); + + if (code & 0x0800) TileFlip(pd + dx, pack, pal); + else TileNorm(pd + dx, pack, pal); } } return; last_cut_tile: + // for vertical window cutoff { - unsigned int t, pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - unsigned char *pd = HighCol+dx; - if (!pack) return; - if (code&0x0800) - { - switch (rlim-dx+8) - { - case 7: t=pack&0x00000f00; if (t) pd[6]=(unsigned char)(pal|(t>> 8)); // "break" is left out intentionally - case 6: t=pack&0x000000f0; if (t) pd[5]=(unsigned char)(pal|(t>> 4)); - case 5: t=pack&0x0000000f; if (t) pd[4]=(unsigned char)(pal|(t )); - case 4: t=pack&0xf0000000; if (t) pd[3]=(unsigned char)(pal|(t>>28)); - case 3: t=pack&0x0f000000; if (t) pd[2]=(unsigned char)(pal|(t>>24)); - case 2: t=pack&0x00f00000; if (t) pd[1]=(unsigned char)(pal|(t>>20)); - case 1: t=pack&0x000f0000; if (t) pd[0]=(unsigned char)(pal|(t>>16)); - default: break; - } - } - else - { - switch (rlim-dx+8) - { - case 7: t=pack&0x00f00000; if (t) pd[6]=(unsigned char)(pal|(t>>20)); - case 6: t=pack&0x0f000000; if (t) pd[5]=(unsigned char)(pal|(t>>24)); - case 5: t=pack&0xf0000000; if (t) pd[4]=(unsigned char)(pal|(t>>28)); - case 4: t=pack&0x0000000f; if (t) pd[3]=(unsigned char)(pal|(t )); - case 3: t=pack&0x000000f0; if (t) pd[2]=(unsigned char)(pal|(t>> 4)); - case 2: t=pack&0x00000f00; if (t) pd[1]=(unsigned char)(pal|(t>> 8)); - case 1: t=pack&0x0000f000; if (t) pd[0]=(unsigned char)(pal|(t>>12)); - default: break; + unsigned int t, mask; + + // rlim-dx + 8 px to draw -> mask shift 8-(rlim-dx + 8) + t = -(rlim - dx); + if (t < 8) { + mask = 0xffffffff<<(t*4); + if (code & 0x0800) mask = 0xffffffff>>(t*4); + mask = (mask << 16) | (mask >> 16); + + if (pack&mask) { + if (code & 0x0800) TileFlip(pd + dx, pack&mask, pal); + else TileNorm(pd + dx, pack&mask, pal); } } } @@ -641,14 +840,16 @@ last_cut_tile: // Index + 0 : hhhhvvvv ab--hhvv yyyyyyyy yyyyyyyy // a: offscreen h, b: offs. v, h: horiz. size // Index + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 -static void DrawSprite(int *sprite, int sh) +static void DrawSprite(s32 *sprite, int sh, int w) { + void (*fTileFunc)(unsigned char *pd, unsigned int pack, unsigned char pal); + unsigned char *pd = Pico.est.HighCol; int width=0,height=0; - int row=0,code=0; + int row=0; + s32 code=0; int pal; int tile=0,delta=0; int sx, sy; - int (*fTileFunc)(int sx,int addr,int pal); // parse the sprite data sy=sprite[0]; @@ -656,9 +857,9 @@ static void DrawSprite(int *sprite, int sh) sx=code>>16; // X width=sy>>28; height=(sy>>24)&7; // Width and height in tiles - sy=(sy<<16)>>16; // Y + sy=(s16)sy; // Y - row=DrawScanline-sy; // Row of the sprite we are on + row=Pico.est.DrawScanline-sy; // Row of the sprite we are on if (code&0x1000) row=(height<<3)-1-row; // Flip Y @@ -670,29 +871,36 @@ static void DrawSprite(int *sprite, int sh) delta<<=4; // Delta of address pal=(code>>9)&0x30; - pal|=sh<<6; + pal|=sh<<7; // shadow if (sh && (code&0x6000) == 0x6000) { if(code&0x0800) fTileFunc=TileFlipSH_markop; else fTileFunc=TileNormSH_markop; + } else if (sh) { + if(code&0x0800) fTileFunc=TileFlipNonSH; + else fTileFunc=TileNormNonSH; } else { if(code&0x0800) fTileFunc=TileFlip; else fTileFunc=TileNorm; } + if (w) width = w; // tile limit for (; width; width--,sx+=8,tile+=delta) { + unsigned int pack; + if(sx<=0) continue; if(sx>=328) break; // Offscreen - tile&=0x7fff; // Clip tile address - fTileFunc(sx,tile,pal); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + (tile & 0x7fff))); + fTileFunc(pd + sx, pack, pal); } } #endif -static void DrawSpriteInterlace(unsigned int *sprite) +static void DrawSpriteInterlace(u32 *sprite) { + unsigned char *pd = Pico.est.HighCol; int width=0,height=0; int row=0,code=0; int pal; @@ -700,15 +908,15 @@ static void DrawSpriteInterlace(unsigned int *sprite) int sx, sy; // parse the sprite data - sy=sprite[0]; + sy=CPU_LE2(sprite[0]); height=sy>>24; sy=(sy&0x3ff)-0x100; // Y width=(height>>2)&3; height&=3; width++; height++; // Width and height in tiles - row=(DrawScanline<<1)-sy; // Row of the sprite we are on + row=(Pico.est.DrawScanline<<1)-sy; // Row of the sprite we are on - code=sprite[1]; + code=CPU_LE2(sprite[1]); sx=((code>>16)&0x1ff)-0x78; // X if (code&0x1000) row^=(16<=328) break; // Offscreen - tile&=0x7fff; // Clip tile address - if (code&0x0800) TileFlip(sx,tile,pal); - else TileNorm(sx,tile,pal); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + (tile & 0x7fff))); + if (code & 0x0800) TileFlip(pd + sx, pack, pal); + else TileNorm(pd + sx, pack, pal); } } -static void DrawAllSpritesInterlace(int pri, int sh) +static NOINLINE void DrawAllSpritesInterlace(int pri, int sh) { struct PicoVideo *pvid=&Pico.video; - int i,u,table,link=0,sline=DrawScanline<<1; - unsigned int *sprites[80]; // Sprite index + int i,u,table,link=0,sline=Pico.est.DrawScanline<<1; + u32 *sprites[80]; // Sprite index + int max_sprites = pvid->reg[12]&1 ? 80 : 64; table=pvid->reg[5]&0x7f; if (pvid->reg[12]&1) table&=0x7e; // Lowest bit 0 in 40-cell mode table<<=8; // Get sprite table address/2 - for (i=u=0; u < 80 && i < 21; u++) + for (i = u = 0; u < max_sprites && link < max_sprites; u++) { - unsigned int *sprite; + u32 *sprite; int code, sx, sy, height; - sprite=(unsigned int *)(Pico.vram+((table+(link<<2))&0x7ffc)); // Find sprite + sprite=(u32 *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite // get sprite info - code = sprite[0]; - sx = sprite[1]; + code = CPU_LE2(sprite[0]); + sx = CPU_LE2(sprite[1]); if(((sx>>15)&1) != pri) goto nextsprite; // wrong priority sprite // check if it is on this line sy = (code&0x3ff)-0x100; height = (((code>>24)&3)+1)<<4; - if(sline < sy || sline >= sy+height) goto nextsprite; // no + if((sline < sy) | (sline >= sy+height)) goto nextsprite; // no // check if sprite is not hidden offscreen sx = (sx>>16)&0x1ff; sx -= 0x78; // Get X coordinate + 8 - if(sx <= -8*3 || sx >= 328) goto nextsprite; + if((sx <= -8*3) | (sx >= 328)) goto nextsprite; // sprite is good, save it's pointer sprites[i++]=sprite; @@ -790,52 +1001,48 @@ static void DrawAllSpritesInterlace(int pri, int sh) * Index + 0 : hhhhvvvv ----hhvv yyyyyyyy yyyyyyyy // v, h: vert./horiz. size * Index + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 */ -static void DrawSpritesSHi(unsigned char *sprited) +static void DrawSpritesSHi(unsigned char *sprited, const struct PicoEState *est) { - int (*fTileFunc)(int sx,int addr,int pal); + static void (*tilefuncs[2][2][2])(unsigned char *, unsigned, unsigned char) = { + { {NULL, NULL}, {TileNorm, TileFlip} }, + { {TileNormSH_onlyop_lp, TileFlipSH_onlyop_lp}, {TileNormSH, TileFlipSH} } + }; // [sh?][hi?][flip?] + void (*fTileFunc)(unsigned char *pd, unsigned int pack, unsigned char pal); + unsigned char *pd = Pico.est.HighCol; unsigned char *p; - int cnt; + int cnt, w; cnt = sprited[0] & 0x7f; if (cnt == 0) return; - p = &sprited[3]; + p = &sprited[4]; + if ((sprited[1] & (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) == (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) + return; // masking effective due to tile overflow // Go through sprites backwards: - for (cnt--; cnt >= 0; cnt--) + w = p[cnt]; // possibly clipped width of last sprite + for (cnt--; cnt >= 0; cnt--, w = 0) { - int *sprite, code, pal, tile, sx, sy; + s32 *sprite, code; + int pal, tile, sx, sy; int offs, delta, width, height, row; offs = (p[cnt] & 0x7f) * 2; - sprite = HighPreSpr + offs; + sprite = est->HighPreSpr + offs; code = sprite[1]; pal = (code>>9)&0x30; - if (pal == 0x30) - { - if (code & 0x8000) // hi priority - { - if (code&0x800) fTileFunc=TileFlipSH; - else fTileFunc=TileNormSH; - } else { - if (code&0x800) fTileFunc=TileFlipSH_onlyop_lp; - else fTileFunc=TileNormSH_onlyop_lp; - } - } else { - if (!(code & 0x8000)) continue; // non-operator low sprite, already drawn - if (code&0x800) fTileFunc=TileFlip; - else fTileFunc=TileNorm; - } + fTileFunc = tilefuncs[pal == 0x30][!!(code & 0x8000)][!!(code & 0x800)]; + if (fTileFunc == NULL) continue; // non-operator low sprite, already drawn // parse remaining sprite data sy=sprite[0]; sx=code>>16; // X width=sy>>28; height=(sy>>24)&7; // Width and height in tiles - sy=(sy<<16)>>16; // Y + sy=(s16)sy; // Y - row=DrawScanline-sy; // Row of the sprite we are on + row=est->DrawScanline-sy; // Row of the sprite we are on if (code&0x1000) row=(height<<3)-1-row; // Flip Y @@ -846,13 +1053,16 @@ static void DrawSpritesSHi(unsigned char *sprited) tile &= 0x7ff; tile<<=4; tile+=(row&7)<<1; // Tile address delta<<=4; // Delta of address + if (w) width = w; // tile limit for (; width; width--,sx+=8,tile+=delta) { + unsigned int pack; + if(sx<=0) continue; if(sx>=328) break; // Offscreen - tile&=0x7fff; // Clip tile address - fTileFunc(sx,tile,pal); + pack = CPU_LE2(*(u32 *)(PicoMem.vram + (tile & 0x7fff))); + fTileFunc(pd + sx, pack, pal); } } } @@ -860,53 +1070,47 @@ static void DrawSpritesSHi(unsigned char *sprited) static void DrawSpritesHiAS(unsigned char *sprited, int sh) { - int (*fTileFunc)(int sx,int addr,int pal); - unsigned char *p; - int entry, cnt, sh_cnt = 0; + static unsigned (*tilefuncs[2][2][2])(unsigned, unsigned char *, unsigned, unsigned char) = { + { {TileNormAS_onlymark, TileFlipAS_onlymark}, {TileNormAS, TileFlipAS} }, + { {TileNormSH_AS_onlyop_lp, TileFlipSH_AS_onlyop_lp}, {TileNormSH_AS, TileFlipSH_AS} } + }; // [sh?][hi?][flip?] + unsigned (*fTileFunc)(unsigned m, unsigned char *pd, unsigned int pack, unsigned char pal); + unsigned char *pd = Pico.est.HighCol; + unsigned char mb[sizeof(DefHighCol)/8]; + unsigned char *p, *mp; + unsigned m; + int entry, cnt; cnt = sprited[0] & 0x7f; if (cnt == 0) return; - rendstatus |= PDRAW_SPR_LO_ON_HI; - - p = &sprited[3]; + memset(mb, 0xff, sizeof(mb)); + p = &sprited[4]; + if ((sprited[1] & (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) == (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) + return; // masking effective due to tile overflow // Go through sprites: for (entry = 0; entry < cnt; entry++) { - int *sprite, code, pal, tile, sx, sy; + s32 *sprite, code; + int pal, tile, sx, sy; int offs, delta, width, height, row; offs = (p[entry] & 0x7f) * 2; - sprite = HighPreSpr + offs; + sprite = Pico.est.HighPreSpr + offs; code = sprite[1]; pal = (code>>9)&0x30; - if (code & 0x8000) // hi priority - { - if (sh && pal == 0x30) - { - if (code&0x800) fTileFunc=TileFlipAS_noop; - else fTileFunc=TileNormAS_noop; - } else { - if (code&0x800) fTileFunc=TileFlipAS; - else fTileFunc=TileNormAS; - } - } else { - if (code&0x800) fTileFunc=TileFlipAS_onlymark; - else fTileFunc=TileNormAS_onlymark; - } - if (sh && pal == 0x30) - p[sh_cnt++] = offs / 2; // re-save for sh/hi pass + fTileFunc = tilefuncs[(sh && pal == 0x30)][!!(code&0x8000)][!!(code&0x800)]; // parse remaining sprite data sy=sprite[0]; sx=code>>16; // X width=sy>>28; height=(sy>>24)&7; // Width and height in tiles - sy=(sy<<16)>>16; // Y + sy=(s16)sy; // Y - row=DrawScanline-sy; // Row of the sprite we are on + row=Pico.est.DrawScanline-sy; // Row of the sprite we are on if (code&0x1000) row=(height<<3)-1-row; // Flip Y @@ -917,268 +1121,588 @@ static void DrawSpritesHiAS(unsigned char *sprited, int sh) tile &= 0x7ff; tile<<=4; tile+=(row&7)<<1; // Tile address delta<<=4; // Delta of address - pal |= 0x80; - for (; width; width--,sx+=8,tile+=delta) + if (entry+1 == cnt) width = p[entry+1]; // last sprite width limited? + while (sx <= 0 && width) width--, sx+=8, tile+=delta; // Offscreen + mp = mb+(sx>>3); + for (m = *mp; width; width--, sx+=8, tile+=delta, *mp++ = m, m >>= 8) { - if(sx<=0) continue; + unsigned int pack; + if(sx>=328) break; // Offscreen - tile&=0x7fff; // Clip tile address - fTileFunc(sx,tile,pal); - } + pack = CPU_LE2(*(u32 *)(PicoMem.vram + (tile & 0x7fff))); + + m |= mp[1] << 8; // next mask byte + // shift mask bits to bits 8-15 for easier load/store handling + m = fTileFunc(m << (8-(sx&0x7)), pd + sx, pack, pal) >> (8-(sx&0x7)); + } + *mp = m; // write last mask byte } - - if (!sh || !(sprited[1]&SPRL_MAY_HAVE_OP)) return; - - /* nasty 1: remove 'sprite' flags */ - { - int c = 320/4/4, *zb = (int *)(HighCol+8); - while (c--) - { - *zb++ &= 0x7f7f7f7f; *zb++ &= 0x7f7f7f7f; - *zb++ &= 0x7f7f7f7f; *zb++ &= 0x7f7f7f7f; - } - } - - /* nasty 2: sh operator pass */ - sprited[0] = sh_cnt; - DrawSpritesSHi(sprited); } +#ifdef FORCE +// NB lots of duplicate code, all for the sake of a small performance gain. +static void DrawStripForced(struct TileStrip *ts, int cellskip) +{ + unsigned char *pd = Pico.est.HighCol; + int tilex, dx, ty, addr=0, cells; + u32 code = 0, oldcode = -1; + int pal = 0; + + // Draw tiles across screen: + tilex=((-ts->hscroll)>>3)+cellskip; + ty=(ts->line&7)<<1; // Y-Offset into tile + dx=((ts->hscroll-1)&7)+1; + cells = ts->cells - cellskip; + if(dx != 8) cells++; // have hscroll, need to draw 1 cell more + dx+=cellskip<<3; + + for (; cells > 0; dx+=8, tilex++, cells--) + { + u32 pack; + + code = PicoMem.vram[ts->nametab + (tilex & ts->xmask)]; + + if (code!=oldcode) { + oldcode = code; + // Get tile address/2: + addr = ((code&0x7ff)<<4) + ty; + if (code & 0x1000) addr^=0xe; // Y-flip + + pal = (code>>9)&0x30; + } + + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); + + if (code & 0x0800) TileFlip_and(pd + dx, pack, pal); + else TileNorm_and(pd + dx, pack, pal); + } +} + +static void DrawStripVSRamForced(struct TileStrip *ts, int plane_sh, int cellskip) +{ + unsigned char *pd = Pico.est.HighCol; + int tilex, dx, ty=0, addr=0, cell=0, nametabadd=0; + u32 code=0, oldcode=-1; + int pal=0, scan=Pico.est.DrawScanline, plane; + + // Draw tiles across screen: + plane = plane_sh & LF_PLANE; + tilex=(-ts->hscroll)>>3; + dx=((ts->hscroll-1)&7)+1; + if (ts->hscroll & 0x0f) { + int adj = ((ts->hscroll ^ dx) >> 3) & 1; + cell -= adj + 1; + ts->cells -= adj; + PicoMem.vsram[0x3e] = PicoMem.vsram[0x3f] = plane_sh >> 16; + } + cell+=cellskip; + tilex+=cellskip; + dx+=cellskip<<3; + + if ((cell&1)==1) + { + int line,vscroll; + vscroll = PicoMem.vsram[plane + (cell&0x3e)]; + + // Find the line in the name table + line=(vscroll+scan)&ts->line&0xffff; // ts->line is really ymask .. + nametabadd=(line>>3)<<(ts->line>>24); // .. and shift[width] + ty=(line&7)<<1; // Y-Offset into tile + } + for (; cell < ts->cells; dx+=8,tilex++,cell++) + { + unsigned int pack; + + if ((cell&1)==0) + { + int line,vscroll; + vscroll = PicoMem.vsram[plane + (cell&0x3e)]; + + // Find the line in the name table + line=(vscroll+scan)&ts->line&0xffff; // ts->line is really ymask .. + nametabadd=(line>>3)<<(ts->line>>24); // .. and shift[width] + ty=(line&7)<<1; // Y-Offset into tile + } + + code=PicoMem.vram[ts->nametab+nametabadd+(tilex&ts->xmask)]; + + if (code!=oldcode) { + oldcode = code; + // Get tile address/2: + addr=(code&0x7ff)<<4; + + pal = (code>>9)&0x30; // shadow + } + + pack = code & 0x1000 ? ty^0xe : ty; // Y-flip + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr+pack)); + + if (code & 0x0800) TileFlip_and(pd + dx, pack, pal); + else TileNorm_and(pd + dx, pack, pal); + } +} + +void DrawStripInterlaceForced(struct TileStrip *ts) +{ + unsigned char *pd = Pico.est.HighCol; + int tilex = 0, dx = 0, ty = 0, cells; + int oldcode = -1; + unsigned int pal = 0, pack = 0; + + // Draw tiles across screen: + tilex=(-ts->hscroll)>>3; + ty=(ts->line&15)<<1; // Y-Offset into tile + dx=((ts->hscroll-1)&7)+1; + cells = ts->cells; + if(dx != 8) cells++; // have hscroll, need to draw 1 cell more + + for (; cells; dx+=8,tilex++,cells--) + { + u32 code = PicoMem.vram[ts->nametab + (tilex & ts->xmask)]; + + if (code!=oldcode) { + oldcode = code; + + // Get tile address/2: + u32 addr = ((code&0x3ff)<<5) + ty; + if (code & 0x1000) addr ^= 0x1e; // Y-flip + + pal = (code>>9)&0x30; // shadow + + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); + } + + if (code & 0x0800) TileFlip_and(pd + dx, pack, pal); + else TileNorm_and(pd + dx, pack, pal); + } +} + +// XXX only duplicated to avoid ARM asm hassles +static void DrawLayerForced(int plane_sh, int cellskip, int maxcells, + struct PicoEState *est) +{ + struct PicoVideo *pvid=&est->Pico->video; + const char shift[4]={5,6,5,7}; // 32,64 or 128 sized tilemaps (2 is invalid) + struct TileStrip ts; + int width, height, ymask; + int vscroll, htab; + + ts.cells=maxcells; + + // Work out the TileStrip to draw + + // Work out the name table size: 32 64 or 128 tiles (0-3) + width=pvid->reg[16]; + height=(width>>4)&3; width&=3; + + ts.xmask=(1<reg[4]&0x07)<<12; // B + else ts.nametab=(pvid->reg[2]&0x38)<< 9; // A + + htab=pvid->reg[13]<<9; // Horizontal scroll table address + switch (pvid->reg[11]&3) { + case 1: htab += (est->DrawScanline<<1) & 0x0f; break; + case 2: htab += (est->DrawScanline<<1) & ~0x0f; break; // Offset by tile + case 3: htab += (est->DrawScanline<<1); break; // Offset by line + } + htab+=plane_sh&1; // A or B + + // Get horizontal scroll value, will be masked later + ts.hscroll = PicoMem.vram[htab & 0x7fff]; + + if((pvid->reg[12]&6) == 6) { + // interlace mode 2 + vscroll = PicoMem.vsram[plane_sh & 1]; // Get vertical scroll value + + // Find the line in the name table + ts.line=(vscroll+(est->DrawScanline<<1))&((ymask<<1)|1); + ts.nametab+=(ts.line>>4)<reg[11]&4) { + // shit, we have 2-cell column based vscroll + // luckily this doesn't happen too often + ts.line=ymask|(shift[width]<<24); // save some stuff instead of line + // vscroll value for leftmost cells in case of hscroll not on 16px boundary + // XXX it's unclear what exactly the hw is doing. Continue reading where it + // stopped last seems to work best (H40: 0x50 (wrap->0x00), H32 0x40). + plane_sh |= PicoMem.vsram[(pvid->reg[12]&1?0x00:0x20) + (plane_sh&1)] << 16; + DrawStripVSRamForced(&ts, plane_sh, cellskip); + } else { + vscroll = PicoMem.vsram[plane_sh & 1]; // Get vertical scroll value + + // Find the line in the name table + ts.line=(vscroll+est->DrawScanline)&ymask; + ts.nametab+=(ts.line>>3)<>9)&0x30; + + if (code&0x800) fTileFunc = TileFlipSH_AS_and; + else fTileFunc = TileNormSH_AS_and; + + // parse remaining sprite data + sy=sprite[0]; + sx=code>>16; // X + width=sy>>28; + height=(sy>>24)&7; // Width and height in tiles + sy=(s16)sy; // Y + + row=Pico.est.DrawScanline-sy; // Row of the sprite we are on + + if (code&0x1000) row=(height<<3)-1-row; // Flip Y + + tile=code + (row>>3); // Tile number increases going down + delta=height; // Delta to increase tile by going right + if (code&0x0800) { tile+=delta*(width-1); delta=-delta; } // Flip X + + tile &= 0x7ff; tile<<=4; tile+=(row&7)<<1; // Tile address + delta<<=4; // Delta of address + + if (entry+1 == cnt) width = p[entry+1]; // last sprite width limited? + while (sx <= 0 && width) width--, sx+=8, tile+=delta; // Offscreen + mp = mb+(sx>>3); + for (m = *mp; width; width--, sx+=8, tile+=delta, *mp++ = m, m >>= 8) + { + u32 pack; + + if(sx>=328) break; // Offscreen + + pack = CPU_LE2(*(u32 *)(PicoMem.vram + (tile & 0x7fff))); + + m |= mp[1] << 8; // next mask byte + // shift mask bits to bits 8-15 for easier load/store handling + m = fTileFunc(m << (8-(sx&0x7)), pd + sx, pack, pal) >> (8-(sx&0x7)); + } + *mp = m; // write last mask byte + } + + // anything not covered by a sprite is off + // XXX Titan hw notes say that transparent pixels remove shadow. Is this also + // the case in areas where no sprites are displayed? + for (cnt = 1; cnt < sizeof(mb)-1; cnt++) + if (mb[cnt] == 0xff) { + *(u32 *)(pd+8*cnt+0) = 0; + *(u32 *)(pd+8*cnt+4) = 0; + } else if (mb[cnt]) + for (m = 0; m < 8; m++) + if (mb[cnt] & (1<Pico->video; int u,link=0,sh; int table=0; - int *pd = HighPreSpr; - int max_lines = 224, max_sprites = 80, max_width = 328; + s32 *pd = HighPreSpr + HighPreSprBank*2; + int max_sprites = 80, max_width = 328; int max_line_sprites = 20; // 20 sprites, 40 tiles - if (!(Pico.video.reg[12]&1)) + // SAT scanning is one line ahead, but don't overshoot. Technically, SAT + // parsing for line 0 is on the last line of the previous frame. + int first_line = est->DrawScanline + !!est->DrawScanline; + if (max_lines > rendlines-1) + max_lines = rendlines-1; + + // look-ahead SAT parsing for next line and sprite pixel fetching for current + // line are limited if display was disabled during HBLANK before current line + if (limit) limit = 16; // max sprites/pixels processed + + if (!(pvid->reg[12]&1)) max_sprites = 64, max_line_sprites = 16, max_width = 264; - if (PicoOpt & POPT_DIS_SPRITE_LIM) + if (*est->PicoOpt & POPT_DIS_SPRITE_LIM) max_line_sprites = MAX_LINE_SPRITES; - if (pvid->reg[1]&8) max_lines = 240; - sh = Pico.video.reg[0xC]&8; // shadow/hilight? + sh = pvid->reg[0xC]&8; // shadow/hilight? table=pvid->reg[5]&0x7f; if (pvid->reg[12]&1) table&=0x7e; // Lowest bit 0 in 40-cell mode table<<=8; // Get sprite table address/2 - if (!full) + for (u = first_line; u <= max_lines; u++) + *((int *)&HighLnSpr[u][0]) = 0; + + for (u = 0; u < max_sprites && link < max_sprites; u++) { - int pack; - // updates: tilecode, sx - for (u=0; u < max_sprites && (pack = *pd); u++, pd+=2) + u32 *sprite; + int code, code2, sx, sy, hv, height, width; + + sprite=(u32 *)(PicoMem.vram+((table+(link<<2))&0x7ffc)); // Find sprite + + // parse sprite info. the 1st half comes from the VDPs internal cache, + // the 2nd half is read from VRAM + code = CPU_LE2(VdpSATCache[2*link]); // normally same as sprite[0] + sy = (code&0x1ff)-0x80; + hv = (code>>24)&0xf; + height = (hv&3)+1; + width = (hv>>2)+1; + + code2 = CPU_LE2(sprite[1]); + sx = (code2>>16)&0x1ff; + sx -= 0x78; // Get X coordinate + 8 + + if (sy <= max_lines && sy + (height<<3) >= first_line) // sprite onscreen (y)? { - unsigned int *sprite; - int code2, sx, sy, height; + int entry, y, w, sx_min, onscr_x, maybe_op = 0; + // omit look-ahead line if sprite parsing limit reached + int last_line = (limit && u >= 2*limit ? max_lines-1 : max_lines); - sprite=(unsigned int *)(Pico.vram+((table+(link<<2))&0x7ffc)); // Find sprite + sx_min = 8-(width<<3); + onscr_x = sx_min < sx && sx < max_width; + if (sh && (code2 & 0x6000) == 0x6000) + maybe_op = SPRL_MAY_HAVE_OP; - // parse sprite info - code2 = sprite[1]; - sx = (code2>>16)&0x1ff; - sx -= 0x78; // Get X coordinate + 8 - sy = (pack << 16) >> 16; - height = (pack >> 24) & 0xf; - - if (sy < max_lines && sy + (height<<3) > DrawScanline && // sprite onscreen (y)? - (sx > -24 || sx < max_width)) // onscreen x + entry = (((pd - HighPreSpr) / 2) & 0x7f) | ((code2>>8)&0x80); + y = (sy >= first_line) ? sy : first_line; + for (; y < sy + (height<<3) && y <= last_line; y++) { - int y = (sy >= DrawScanline) ? sy : DrawScanline; - int entry = ((pd - HighPreSpr) / 2) | ((code2>>8)&0x80); - for (; y < sy + (height<<3) && y < max_lines; y++) - { - int i, cnt; - cnt = HighLnSpr[y][0] & 0x7f; - if (cnt >= max_line_sprites) continue; // sprite limit? + unsigned char *p = &HighLnSpr[y][0]; + int cnt = p[0] & 0x7f; + if (p[1] & SPRL_MASKED) continue; // masked? - for (i = 0; i < cnt; i++) - if (((HighLnSpr[y][3+i] ^ entry) & 0x7f) == 0) goto found; + if (p[3] >= max_line_sprites) continue; // sprite limit? + p[3] ++; - // this sprite was previously missing - HighLnSpr[y][3+cnt] = entry; - HighLnSpr[y][0] = cnt + 1; -found:; - if (entry & 0x80) - HighLnSpr[y][1] |= SPRL_HAVE_HI; - else HighLnSpr[y][1] |= SPRL_HAVE_LO; + w = width; + if (p[2] + width > max_line_sprites*2) { // tile limit? + if (y+1 < 240) HighLnSpr[y+1][1] |= SPRL_TILE_OVFL; + if (p[2] >= max_line_sprites*2) continue; + w = max_line_sprites*2 - p[2]; } + p[2] += w; + + if (sx == -0x78) { + if (p[1] & (SPRL_HAVE_X|SPRL_TILE_OVFL)) + p[1] |= SPRL_MASKED; // masked, no more sprites for this line + if (!(p[1] & SPRL_HAVE_X) && cnt == 0) + p[1] |= SPRL_HAVE_MASK0; // 1st sprite is masking + } else + p[1] |= SPRL_HAVE_X; + + if (!onscr_x) continue; // offscreen x + + // sprite is (partly) visible, store info for renderer + p[1] |= (entry & 0x80) ? SPRL_HAVE_HI : SPRL_HAVE_LO; + p[1] |= maybe_op; // there might be op sprites on this line + if (cnt > 0 && (code2 & 0x8000) && !(p[4+cnt-1]&0x80)) + p[1] |= SPRL_LO_ABOVE_HI; + + p[4+cnt] = entry; + p[5+cnt] = w; // width clipped by tile limit for sprite renderer + p[0] = (cnt + 1) | HighPreSprBank; } + } - code2 &= ~0xfe000000; - code2 -= 0x00780000; // Get X coordinate + 8 in upper 16 bits - pd[1] = code2; + *pd++ = (width<<28)|(height<<24)|(link<<16)|((u16)sy); + *pd++ = (sx<<16)|((u16)code2); - // Find next sprite - link=(sprite[0]>>16)&0x7f; - if (!link) break; // End of sprites + // Find next sprite + link=(code>>16)&0x7f; + if (!link) break; // End of sprites + } + *pd = 0; + + // fetching sprite pixels isn't done while display is disabled during HBLANK + if (limit) { + int w = 0; + unsigned char *sprited = &HighLnSpr[max_lines-1][0]; // current render line + + for (u = 0; u < (sprited[0] & 0x7f); u++) { + s32 *sp = HighPreSpr + (sprited[4+u] & 0x7f) * 2 + HighPreSprBank*2; + int sw = sp[0] >> 28; + if (w + sw > limit) { + sprited[0] = u | HighPreSprBank; + sprited[4+u] = limit-w; + break; + } + w += sw; } } - else - { - for (u = 0; u < max_lines; u++) - *((int *)&HighLnSpr[u][0]) = 0; - - for (u = 0; u < max_sprites; u++) - { - unsigned int *sprite; - int code, code2, sx, sy, hv, height, width; - - sprite=(unsigned int *)(Pico.vram+((table+(link<<2))&0x7ffc)); // Find sprite - - // parse sprite info - code = sprite[0]; - sy = (code&0x1ff)-0x80; - hv = (code>>24)&0xf; - height = (hv&3)+1; - - width = (hv>>2)+1; - code2 = sprite[1]; - sx = (code2>>16)&0x1ff; - sx -= 0x78; // Get X coordinate + 8 - - if (sy < max_lines && sy + (height<<3) > DrawScanline) // sprite onscreen (y)? - { - int entry, y, sx_min, onscr_x, maybe_op = 0; - - sx_min = 8-(width<<3); - onscr_x = sx_min < sx && sx < max_width; - if (sh && (code2 & 0x6000) == 0x6000) - maybe_op = SPRL_MAY_HAVE_OP; - - entry = ((pd - HighPreSpr) / 2) | ((code2>>8)&0x80); - y = (sy >= DrawScanline) ? sy : DrawScanline; - for (; y < sy + (height<<3) && y < max_lines; y++) - { - unsigned char *p = &HighLnSpr[y][0]; - int cnt = p[0]; - if (cnt >= max_line_sprites) continue; // sprite limit? - - if (p[2] >= max_line_sprites*2) { // tile limit? - p[0] |= 0x80; - continue; - } - p[2] += width; - - if (sx == -0x78) { - if (cnt > 0) - p[0] |= 0x80; // masked, no more sprites for this line - continue; - } - // must keep the first sprite even if it's offscreen, for masking - if (cnt > 0 && !onscr_x) continue; // offscreen x - - p[3+cnt] = entry; - p[0] = cnt + 1; - p[1] |= (entry & 0x80) ? SPRL_HAVE_HI : SPRL_HAVE_LO; - p[1] |= maybe_op; // there might be op sprites on this line - if (cnt > 0 && (code2 & 0x8000) && !(p[3+cnt-1]&0x80)) - p[1] |= SPRL_LO_ABOVE_HI; - } - } - - *pd++ = (width<<28)|(height<<24)|(hv<<16)|((unsigned short)sy); - *pd++ = (sx<<16)|((unsigned short)code2); - - // Find next sprite - link=(code>>16)&0x7f; - if (!link) break; // End of sprites - } - *pd = 0; #if 0 - for (u = 0; u < max_lines; u++) - { - int y; - printf("c%03i: %2i, %2i: ", u, HighLnSpr[u][0] & 0x7f, HighLnSpr[u][2]); - for (y = 0; y < HighLnSpr[u][0] & 0x7f; y++) - printf(" %i", HighLnSpr[u][y+3]); - printf("\n"); + for (u = first_line; u <= max_lines; u++) + { + int y; + printf("c%03i b%d: f %x c %2i/%2i w %2i: ", u, !!HighPreSprBank, HighLnSpr[u][1], + HighLnSpr[u][0] & 0x7f, HighLnSpr[u][3], HighLnSpr[u][2]); + for (y = 0; y < (HighLnSpr[u][0] & 0x7f); y++) { + s32 *sp = HighPreSpr + (HighLnSpr[u][y+4]&0x7f) * 2 + HighPreSprBank*2; + printf(" %i(%x/%x)", HighLnSpr[u][y+4],sp[0],sp[1]); } -#endif + printf("\n"); } +#endif + + HighPreSprBank ^= 0x80; } #ifndef _ASM_DRAW_C -static void DrawAllSprites(unsigned char *sprited, int prio, int sh) +static void DrawAllSprites(unsigned char *sprited, int prio, int sh, + struct PicoEState *est) { - int rs = rendstatus; unsigned char *p; - int cnt; - - if (rs & (PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES)) { - //elprintf(EL_STATUS, "PrepareSprites(%i)", (rs>>4)&1); - PrepareSprites(rs & PDRAW_DIRTY_SPRITES); - rendstatus = rs & ~(PDRAW_SPRITES_MOVED|PDRAW_DIRTY_SPRITES); - } + int cnt, w; cnt = sprited[0] & 0x7f; if (cnt == 0) return; - p = &sprited[3]; + p = &sprited[4]; + if ((sprited[1] & (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) == (SPRL_TILE_OVFL|SPRL_HAVE_MASK0)) + return; // masking effective due to tile overflow // Go through sprites backwards: - for (cnt--; cnt >= 0; cnt--) + w = p[cnt]; // possibly clipped width of last sprite + for (cnt--; cnt >= 0; cnt--, w = 0) { - int offs; + s32 *sp = est->HighPreSpr + (p[cnt]&0x7f) * 2; if ((p[cnt] >> 7) != prio) continue; - offs = (p[cnt]&0x7f) * 2; - DrawSprite(HighPreSpr + offs, sh); + DrawSprite(sp, sh, w); } } // -------------------------------------------- -void BackFill(int reg7, int sh) +void BackFill(int bgc, int sh, struct PicoEState *est) { - unsigned int back; + u32 back = bgc; // Start with a blank scanline (background colour): - back=reg7&0x3f; - back|=sh<<6; + back|=sh<<7; // shadow back|=back<<8; back|=back<<16; - memset32((int *)(HighCol+8), back, 320/4); + memset32((int *)(est->HighCol+8), back, 320/4); } #endif // -------------------------------------------- -unsigned short HighPal[0x100]; +static u16 *BgcDMAbase; +static u32 BgcDMAsrc, BgcDMAmask; +static int BgcDMAlen, BgcDMAoffs; #ifndef _ASM_DRAW_C -void PicoDoHighPal555(int sh) +static +#endif +// handle DMA to background color +void BgcDMA(struct PicoEState *est) +{ + u16 *pd=est->DrawLineDest; + int len = (est->Pico->video.reg[12]&1) ? 320 : 256; + // TODO for now handles the line as all background. + int xl = (len == 320 ? 38 : 33); // DMA slots during HSYNC + int upscale = (est->rendstatus & PDRAW_SOFTSCALE) && len < 320; + u16 *q = upscale ? DefOutBuff : pd; + int i, l = len; + u16 t; + + if ((est->rendstatus & PDRAW_BORDER_32) && !upscale) + q += (320-len) / 2; + + BgcDMAlen -= ((l-BgcDMAoffs)>>1)+xl; + if (BgcDMAlen <= 0) { + // partial line + l += 2*BgcDMAlen; + est->rendstatus &= ~PDRAW_BGC_DMA; + } + + for (i = BgcDMAoffs; i < l; i += 2) { + // TODO use ps to overwrite only real bg pixels + t = BgcDMAbase[BgcDMAsrc++ & BgcDMAmask]; + q[i] = q[i+1] = PXCONV(t); + } + BgcDMAsrc += xl; // HSYNC DMA + BgcDMAoffs = 0; + + t = PXCONV(PicoMem.cram[Pico.video.reg[7] & 0x3f]); + while (i < len) q[i++] = t; // fill partial line with BG + + if (upscale) { + switch (PicoIn.filter) { + case 3: h_upscale_bl4_4_5(pd, 320, q, 256, len, f_nop); break; + case 2: h_upscale_bl2_4_5(pd, 320, q, 256, len, f_nop); break; + case 1: h_upscale_snn_4_5(pd, 320, q, 256, len, f_nop); break; + default: h_upscale_nn_4_5(pd, 320, q, 256, len, f_nop); break; + } + } +} + +// -------------------------------------------- + +static void PicoDoHighPal555_8bit(int sh, int line, struct PicoEState *est) { unsigned int *spal, *dpal; + unsigned int cnt = (sh ? 1 : est->SonicPalCount+1); unsigned int t, i; - Pico.m.dirtyPal = 0; + // reset dirty only if there are no outstanding changes + if (est->Pico->m.dirtyPal == 2) + est->Pico->m.dirtyPal = 0; - spal = (void *)Pico.cram; - dpal = (void *)HighPal; + // In Sonic render mode palettes were backuped in SonicPal + spal = (void *)est->SonicPal; + dpal = (void *)est->HighPal; - for (i = 0; i < 0x40 / 2; i++) { + // additional palettes stored after in-frame changes + for (i = 0; i < cnt * 0x40 / 2; i++) { t = spal[i]; -#ifdef USE_BGR555 - t = ((t & 0x000e000e)<< 1) | ((t & 0x00e000e0)<<3) | ((t & 0x0e000e00)<<4); -#else - t = ((t & 0x000e000e)<<12) | ((t & 0x00e000e0)<<3) | ((t & 0x0e000e00)>>7); -#endif // treat it like it was 4-bit per channel, since in s/h mode it somewhat is that. // otherwise intensity difference between this and s/h will be wrong - t |= (t >> 4) & 0x08610861; // 0x18e318e3 + t = PXCONV(t); + t |= (t >> 4) & PXMASKL; dpal[i] = t; } @@ -1186,86 +1710,105 @@ void PicoDoHighPal555(int sh) if (sh) { // shadowed pixels - for (i = 0; i < 0x40 / 2; i++) - dpal[0x40/2 | i] = dpal[0xc0/2 | i] = (dpal[i] >> 1) & 0x738e738e; + for (i = 0; i < 0x40 / 2; i++) { + dpal[0xc0/2 + i] = dpal[i]; + dpal[0x80/2 + i] = (dpal[i] >> 1) & PXMASKH; + } // hilighted pixels for (i = 0; i < 0x40 / 2; i++) { - t = ((dpal[i] >> 1) & 0x738e738e) + 0x738e738e; // 0x7bef7bef; - t |= (t >> 4) & 0x08610861; - dpal[0x80/2 | i] = t; + t = ((dpal[i] >> 1) & PXMASKH) + PXMASKH; + t |= (t >> 4) & PXMASKL; + dpal[0x40/2 + i] = t; } } } -#if 0 -static void FinalizeLineBGR444(int sh, int line) +#ifndef _ASM_DRAW_C +void PicoDoHighPal555(int sh, int line, struct PicoEState *est) { - unsigned short *pd=DrawLineDest; - unsigned char *ps=HighCol+8; - unsigned short *pal=Pico.cram; - int len, i, t, mask=0xff; + unsigned int *spal, *dpal; + unsigned int t, i; - if (Pico.video.reg[12]&1) { - len = 320; - } else { - if(!(PicoOpt&POPT_DIS_32C_BORDER)) pd+=32; - len = 256; + est->Pico->m.dirtyPal = 0; + + spal = (void *)PicoMem.cram; + dpal = (void *)est->HighPal; + + for (i = 0; i < 0x40 / 2; i++) { + t = spal[i]; + // treat it like it was 4-bit per channel, since in s/h mode it somewhat is that. + // otherwise intensity difference between this and s/h will be wrong + t = PXCONV(t); + t |= (t >> 4) & PXMASKL; + dpal[i] = dpal[0xc0/2 + i] = t; } - if(sh) { - pal=HighPal; - if(Pico.m.dirtyPal) { - blockcpy(pal, Pico.cram, 0x40*2); - // shadowed pixels - for(i = 0x3f; i >= 0; i--) - pal[0x40|i] = pal[0xc0|i] = (unsigned short)((pal[i]>>1)&0x0777); - // hilighted pixels - for(i = 0x3f; i >= 0; i--) { - t=pal[i]&0xeee;t+=0x444;if(t&0x10)t|=0xe;if(t&0x100)t|=0xe0;if(t&0x1000)t|=0xe00;t&=0xeee; - pal[0x80|i]=(unsigned short)t; - } - Pico.m.dirtyPal = 0; + // norm: xxx0, sh: 0xxx, hi: 0xxx + 7 + if (sh) + { + // shadowed pixels + for (i = 0; i < 0x40 / 2; i++) + dpal[0x80/2 + i] = (dpal[i] >> 1) & PXMASKH; + // hilighted pixels + for (i = 0; i < 0x40 / 2; i++) { + t = ((dpal[i] >> 1) & PXMASKH) + PXMASKH; + t |= (t >> 4) & PXMASKL; + dpal[0x40/2 + i] = t; } } - - if (!sh && (rendstatus & PDRAW_SPR_LO_ON_HI)) - mask=0x3f; // accurate sprites - - for(i = 0; i < len; i++) - pd[i] = pal[ps[i] & mask]; } -#endif - -void FinalizeLine555(int sh, int line) +void FinalizeLine555(int sh, int line, struct PicoEState *est) { - unsigned short *pd=DrawLineDest; - unsigned char *ps=HighCol+8; - unsigned short *pal=HighPal; + unsigned short *pd=est->DrawLineDest; + unsigned char *ps=est->HighCol+8; + unsigned short *pal=est->HighPal; int len; - if (Pico.m.dirtyPal) - PicoDoHighPal555(sh); + if (DrawLineDestIncrement == 0) + return; - if (Pico.video.reg[12]&1) { + if (est->rendstatus & PDRAW_BGC_DMA) + return BgcDMA(est); + + PicoDrawUpdateHighPal(); + + len = 256; + if (!(PicoIn.AHW & PAHW_8BIT) && (est->Pico->video.reg[12]&1)) len = 320; + else if ((PicoIn.AHW & PAHW_GG) && (est->Pico->m.hardware & PMS_HW_LCD)) + len = 160; + else if ((PicoIn.AHW & PAHW_SMS) && (est->Pico->video.reg[0] & 0x20)) + len -= 8, ps += 8; + + if ((est->rendstatus & PDRAW_SOFTSCALE) && len < 320) { + if (len >= 240 && len <= 256) { + pd += (256-len)>>1; + switch (PicoIn.filter) { + case 3: h_upscale_bl4_4_5(pd, 320, ps, 256, len, f_pal); break; + case 2: h_upscale_bl2_4_5(pd, 320, ps, 256, len, f_pal); break; + case 1: h_upscale_snn_4_5(pd, 320, ps, 256, len, f_pal); break; + default: h_upscale_nn_4_5(pd, 320, ps, 256, len, f_pal); break; + } + if (est->rendstatus & PDRAW_32X_SCALE) { // 32X needs scaled CLUT data + unsigned char *psc = ps - 256, *pdc = psc; + rh_upscale_nn_4_5(pdc, 320, psc, 256, 256, f_nop); + } + } else if (len == 160) + switch (PicoIn.filter) { + case 3: + case 2: h_upscale_bl2_1_2(pd, 320, ps, 160, len, f_pal); break; + default: h_upscale_nn_1_2(pd, 320, ps, 160, len, f_pal); break; + } } else { - if (!(PicoOpt&POPT_DIS_32C_BORDER)) pd+=32; - len = 256; - } - - { -#ifndef PSP - int i, mask=0xff; - if (!sh && (rendstatus & PDRAW_SPR_LO_ON_HI)) - mask=0x3f; // accurate sprites, upper bits are priority stuff - - for (i = 0; i < len; i++) - pd[i] = pal[ps[i] & mask]; + if ((est->rendstatus & PDRAW_BORDER_32) && len < 320) + pd += (320-len) / 2; +#if 1 + h_copy(pd, 320, ps, 320, len, f_pal); #else extern void amips_clut(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); extern void amips_clut_6bit(unsigned short *dst, unsigned char *src, unsigned short *pal, int count); - if (!sh && (rendstatus & PDRAW_SPR_LO_ON_HI)) + if (!sh) amips_clut_6bit(pd, ps, pal, len); else amips_clut(pd, ps, pal, len); #endif @@ -1273,58 +1816,74 @@ void FinalizeLine555(int sh, int line) } #endif -static void FinalizeLine8bit(int sh, int line) +void FinalizeLine8bit(int sh, int line, struct PicoEState *est) { - unsigned char *pd = DrawLineDest; - int len, rs = rendstatus; - static int dirty_count; + unsigned char *pd = est->DrawLineDest; + unsigned char *ps = est->HighCol+8; + int len; + static int dirty_line; - if (!sh && Pico.m.dirtyPal == 1) + // a hack for mid-frame palette changes + if (est->Pico->m.dirtyPal == 1) { - // a hack for mid-frame palette changes - if (!(rs & PDRAW_SONIC_MODE)) - dirty_count = 1; - else dirty_count++; - rs |= PDRAW_SONIC_MODE; - rendstatus = rs; - if (dirty_count == 3) { - blockcpy(HighPal, Pico.cram, 0x40*2); - } else if (dirty_count == 11) { - blockcpy(HighPal+0x40, Pico.cram, 0x40*2); + // store a maximum of 3 additional palettes in SonicPal + if (est->SonicPalCount < 3 && + (!(est->rendstatus & PDRAW_SONIC_MODE) || (line - dirty_line >= 4))) { + est->SonicPalCount ++; + dirty_line = line; + est->rendstatus |= PDRAW_SONIC_MODE; } + blockcpy(est->SonicPal+est->SonicPalCount*0x40, PicoMem.cram, 0x40*2); + est->Pico->m.dirtyPal = 2; } - if (Pico.video.reg[12]&1) { + len = 256; + if (!(PicoIn.AHW & PAHW_8BIT) && (est->Pico->video.reg[12]&1)) len = 320; - } else { - if (!(PicoOpt & POPT_DIS_32C_BORDER)) - pd += 32; - len = 256; - } + else if ((PicoIn.AHW & PAHW_GG) && (est->Pico->m.hardware & PMS_HW_LCD)) + len = 160; + else if ((PicoIn.AHW & PAHW_SMS) && (est->Pico->video.reg[0] & 0x20)) + len -= 8, ps += 8; - if (!sh && (rs & PDRAW_SONIC_MODE)) { - if (dirty_count >= 11) { - blockcpy_or(pd, HighCol+8, len, 0x80); - } else { - blockcpy_or(pd, HighCol+8, len, 0x40); - } + if (DrawLineDestIncrement == 0) + pd = est->HighCol+8; + + if ((est->rendstatus & PDRAW_SOFTSCALE) && len < 320) { + unsigned char pal = 0; + + if (!sh && (est->rendstatus & PDRAW_SONIC_MODE)) + pal = est->SonicPalCount*0x40; + // Smoothing can't be used with CLUT, hence it's always Nearest Neighbour. + if (len >= 240) + // use reverse version since src and dest ptr may be the same. + rh_upscale_nn_4_5(pd, 320, ps, 256, len, f_or); + else + rh_upscale_nn_1_2(pd, 320, ps, 256, len, f_or); } else { - blockcpy(pd, HighCol+8, len); + if ((est->rendstatus & PDRAW_BORDER_32) && len < 320) + pd += (320-len) / 2; + if (!sh && (est->rendstatus & PDRAW_SONIC_MODE)) + // select active backup palette + blockcpy_or(pd, ps, len, est->SonicPalCount*0x40); + else if (pd != ps) + blockcpy(pd, ps, len); } } -static void (*FinalizeLine)(int sh, int line); +static void (*FinalizeLine)(int sh, int line, struct PicoEState *est); // -------------------------------------------- static int DrawDisplay(int sh) { - unsigned char *sprited = &HighLnSpr[DrawScanline][0]; - struct PicoVideo *pvid=&Pico.video; - int win=0,edge=0,hvwind=0; - int maxw,maxcells; + struct PicoEState *est=&Pico.est; + unsigned char *sprited = &HighLnSpr[est->DrawScanline][0]; + struct PicoVideo *pvid=&est->Pico->video; + int win=0, edge=0, hvwind=0, lflags; + int maxw, maxcells; - rendstatus &= ~(PDRAW_SHHI_DONE|PDRAW_PLANE_HI_PRIO); + est->rendstatus &= ~(PDRAW_SHHI_DONE|PDRAW_PLANE_HI_PRIO|PDRAW_WND_DIFF_PRIO); + est->HighPreSpr = HighPreSpr + (sprited[0]&0x80)*2; if (pvid->reg[12]&1) { maxw = 328; maxcells = 40; @@ -1336,8 +1895,8 @@ static int DrawDisplay(int sh) win=pvid->reg[0x12]; edge=(win&0x1f)<<3; - if (win&0x80) { if (DrawScanline>=edge) hvwind=1; } - else { if (DrawScanline< edge) hvwind=1; } + if (win&0x80) { if (est->DrawScanline>=edge) hvwind=1; } + else { if (est->DrawScanline< edge) hvwind=1; } if (!hvwind) // we might have a vertical window here { @@ -1354,54 +1913,76 @@ static int DrawDisplay(int sh) } /* - layer B low - */ - if (PicoDrawMask & PDRAW_LAYERB_ON) - DrawLayer(1|(sh<<1), HighCacheB, 0, maxcells); + if (!(pvid->debug_p & PVD_KILL_B)) { + lflags = LF_PLANE_B | (sh<<1); + DrawLayer(lflags, HighCacheB, 0, maxcells, est); + } /* - layer A low - */ - if (!(PicoDrawMask & PDRAW_LAYERA_ON)); + lflags = LF_PLANE_A | (sh<<1); + if (pvid->debug_p & PVD_KILL_A) + ; else if (hvwind == 1) - DrawWindow(0, maxcells>>1, 0, sh); + DrawWindow(0, maxcells>>1, 0, sh, est); else if (hvwind == 2) { - DrawLayer(0|(sh<<1), HighCacheA, (win&0x80) ? 0 : edge<<1, (win&0x80) ? edge<<1 : maxcells); - DrawWindow( (win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 0, sh); - } else - DrawLayer(0|(sh<<1), HighCacheA, 0, maxcells); + DrawLayer(lflags, HighCacheA, (win&0x80) ? 0 : edge<<1, (win&0x80) ? edge<<1 : maxcells, est); + DrawWindow( (win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 0, sh, est); + } + else + DrawLayer(lflags, HighCacheA, 0, maxcells, est); /* - sprites low - */ - if (!(PicoDrawMask & PDRAW_SPRITES_LOW_ON)); - else if (rendstatus & PDRAW_INTERLACE) + if (pvid->debug_p & PVD_KILL_S_LO) + ; + else if (est->rendstatus & PDRAW_INTERLACE) DrawAllSpritesInterlace(0, sh); else if (sprited[1] & SPRL_HAVE_LO) - DrawAllSprites(sprited, 0, sh); + DrawAllSprites(sprited, 0, sh, est); /* - layer B hi - */ - if ((PicoDrawMask & PDRAW_LAYERB_ON) && HighCacheB[0]) - DrawTilesFromCache(HighCacheB, sh, maxw); + if (!(pvid->debug_p & PVD_KILL_B) && HighCacheB[0]) + DrawTilesFromCache(HighCacheB, sh, maxw, est); /* - layer A hi - */ - if (!(PicoDrawMask & PDRAW_LAYERA_ON)); + if (pvid->debug_p & PVD_KILL_A) + ; else if (hvwind == 1) - DrawWindow(0, maxcells>>1, 1, sh); + DrawWindow(0, maxcells>>1, 1, sh, est); else if (hvwind == 2) { - if (HighCacheA[0]) DrawTilesFromCache(HighCacheA, sh, (win&0x80) ? edge<<4 : maxw); - DrawWindow((win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 1, sh); + if (HighCacheA[0]) + DrawTilesFromCache(HighCacheA, sh, (win&0x80) ? edge<<4 : maxw, est); + DrawWindow((win&0x80) ? edge : 0, (win&0x80) ? maxcells>>1 : edge, 1, sh, est); } else - if (HighCacheA[0]) DrawTilesFromCache(HighCacheA, sh, maxw); + if (HighCacheA[0]) + DrawTilesFromCache(HighCacheA, sh, maxw, est); /* - sprites hi - */ - if (!(PicoDrawMask & PDRAW_SPRITES_HI_ON)); - else if (rendstatus & PDRAW_INTERLACE) + if (pvid->debug_p & PVD_KILL_S_HI) + ; + else if (est->rendstatus & PDRAW_INTERLACE) DrawAllSpritesInterlace(1, sh); // have sprites without layer pri bit ontop of sprites with that bit - else if ((sprited[1] & 0xd0) == 0xd0 && (PicoOpt & POPT_ACC_SPRITES)) + else if ((sprited[1] & SPRL_LO_ABOVE_HI) && (*est->PicoOpt & POPT_ACC_SPRITES)) DrawSpritesHiAS(sprited, sh); else if (sh && (sprited[1] & SPRL_MAY_HAVE_OP)) - DrawSpritesSHi(sprited); + DrawSpritesSHi(sprited, est); else if (sprited[1] & SPRL_HAVE_HI) - DrawAllSprites(sprited, 1, 0); + DrawAllSprites(sprited, 1, 0, est); + +#ifdef FORCE + if (pvid->debug_p & PVD_FORCE_B) { + lflags = LF_PLANE_B | (sh<<1); + DrawLayerForced(lflags, 0, maxcells, est); + } else if (pvid->debug_p & PVD_FORCE_A) { + lflags = LF_PLANE_A | (sh<<1); + DrawLayerForced(lflags, 0, maxcells, est); + } else if (pvid->debug_p & PVD_FORCE_S) + DrawSpritesForced(sprited); +#endif #if 0 { int *c, a, b; - for (a = 0, c = HighCacheA; *c; c++, a++); - for (b = 0, c = HighCacheB; *c; c++, b++); - printf("%i:%03i: a=%i, b=%i\n", Pico.m.frame_count, DrawScanline, a, b); + for (a = 0, c = HighCacheA; *c; c+=2, a++); + for (b = 0, c = HighCacheB; *c; c+=2, b++); + printf("%i:%03i: a=%i, b=%i\n", Pico.m.frame_count, + est->DrawScanline, a, b); } #endif @@ -1411,68 +1992,77 @@ static int DrawDisplay(int sh) // MUST be called every frame PICO_INTERNAL void PicoFrameStart(void) { - int offs = 8, lines = 224; + struct PicoEState *est = &Pico.est; + int loffs = 8, lines = 224, coffs = 0, columns = 320; + int sprep = est->rendstatus & PDRAW_DIRTY_SPRITES; + int skipped = est->rendstatus & PDRAW_SKIP_FRAME; + int sync = est->rendstatus & (PDRAW_SYNC_NEEDED | PDRAW_SYNC_NEXT); // prepare to do this frame - rendstatus = 0; - if ((Pico.video.reg[12] & 6) == 6) - rendstatus |= PDRAW_INTERLACE; // interlace mode - if (!(Pico.video.reg[12] & 1)) - rendstatus |= PDRAW_32_COLS; - if (Pico.video.reg[1] & 8) { - offs = 0; + est->rendstatus = 0; + + if (PicoIn.AHW & PAHW_32X) // H32 upscaling, before mixing in 32X layer + est->rendstatus = (*est->PicoOpt & POPT_ALT_RENDERER) ? + PDRAW_BORDER_32 : PDRAW_32X_SCALE|PDRAW_SOFTSCALE; + else if (!(PicoIn.opt & POPT_DIS_32C_BORDER)) + est->rendstatus |= PDRAW_BORDER_32; + + if ((PicoIn.opt & POPT_EN_SOFTSCALE) && !(*est->PicoOpt & POPT_ALT_RENDERER)) + est->rendstatus |= PDRAW_SOFTSCALE; + + if ((est->Pico->video.reg[12] & 6) == 6) + est->rendstatus |= PDRAW_INTERLACE; // interlace mode + if (!(est->Pico->video.reg[12] & 1)) { + est->rendstatus |= PDRAW_32_COLS; + if (!(est->rendstatus & PDRAW_SOFTSCALE)) { + columns = 256; + coffs = 32; + } + } + if (est->Pico->video.reg[1] & 8) { + est->rendstatus |= PDRAW_30_ROWS; lines = 240; + loffs = 0; } + if (!(est->rendstatus & PDRAW_BORDER_32)) + coffs = 0; - if (rendstatus != rendstatus_old || lines != rendlines) { + if (est->rendstatus != rendstatus_old || lines != rendlines) { rendlines = lines; - // mode_change() might reset rendstatus_old by calling SetColorFormat - emu_video_mode_change((lines == 240) ? 0 : 8, - lines, (Pico.video.reg[12] & 1) ? 0 : 1); + // mode_change() might reset rendstatus_old by calling SetOutFormat + int rendstatus = est->rendstatus; + emu_video_mode_change(loffs, lines, coffs, columns); rendstatus_old = rendstatus; + // mode_change() might clear buffers, redraw needed + est->rendstatus |= PDRAW_SYNC_NEEDED; } - HighCol = HighColBase + offs * HighColIncrement; - DrawLineDest = (char *)DrawLineDestBase + offs * DrawLineDestIncrement; - DrawScanline = 0; + if (sync | skipped) + est->rendstatus |= PDRAW_SYNC_NEEDED; + if (PicoIn.skipFrame) // preserve this until something is rendered at last + est->rendstatus |= PDRAW_SKIP_FRAME; + if (sprep | skipped) + est->rendstatus |= PDRAW_PARSE_SPRITES; + + est->HighCol = HighColBase + loffs * HighColIncrement; + est->DrawLineDest = (char *)DrawLineDestBase + loffs * DrawLineDestIncrement; + est->DrawScanline = 0; skip_next_line = 0; - if (PicoOpt & POPT_ALT_RENDERER) - return; - - if (Pico.m.dirtyPal) - Pico.m.dirtyPal = 2; // reset dirty if needed - PrepareSprites(1); + if (FinalizeLine == FinalizeLine8bit) { + // make a backup of the current palette in case Sonic mode is detected later + est->Pico->m.dirtyPal = (est->Pico->m.dirtyPal || est->SonicPalCount ? 2 : 0); + blockcpy(est->SonicPal, PicoMem.cram, 0x40*2); + } + est->SonicPalCount = 0; } static void DrawBlankedLine(int line, int offs, int sh, int bgc) { - if (PicoScanBegin != NULL) - PicoScanBegin(line + offs); + struct PicoEState *est = &Pico.est; + int skip = skip_next_line; - BackFill(bgc, sh); - - if (FinalizeLine != NULL) - FinalizeLine(sh, line); - - if (PicoScanEnd != NULL) - PicoScanEnd(line + offs); - - HighCol += HighColIncrement; - DrawLineDest = (char *)DrawLineDest + DrawLineDestIncrement; -} - -static void PicoLine(int line, int offs, int sh, int bgc) -{ - int skip = 0; - - if (skip_next_line > 0) { - skip_next_line--; - return; - } - - DrawScanline = line; - if (PicoScanBegin != NULL) + if (PicoScanBegin != NULL && skip == 0) skip = PicoScanBegin(line + offs); if (skip) { @@ -1480,67 +2070,193 @@ static void PicoLine(int line, int offs, int sh, int bgc) return; } - // Draw screen: - BackFill(bgc, sh); - if (Pico.video.reg[1]&0x40) - DrawDisplay(sh); + BackFill(bgc, sh, est); if (FinalizeLine != NULL) - FinalizeLine(sh, line); + FinalizeLine(sh, line, est); if (PicoScanEnd != NULL) skip_next_line = PicoScanEnd(line + offs); - HighCol += HighColIncrement; - DrawLineDest = (char *)DrawLineDest + DrawLineDestIncrement; + est->HighCol += HighColIncrement; + est->DrawLineDest = (char *)est->DrawLineDest + DrawLineDestIncrement; } -void PicoDrawSync(int to, int blank_last_line) +static void PicoLine(int line, int offs, int sh, int bgc, int off, int on) { - int line, offs = 0; - int sh = (Pico.video.reg[0xC] & 8) >> 3; // shadow/hilight? - int bgc = Pico.video.reg[7]; + struct PicoEState *est = &Pico.est; + int skip = skip_next_line; + + est->DrawScanline = line; + if (PicoScanBegin != NULL && skip == 0) + skip = PicoScanBegin(line + offs); + + if (skip) { + skip_next_line = skip - 1; + return; + } + + if (est->Pico->video.debug_p & (PVD_FORCE_A | PVD_FORCE_B | PVD_FORCE_S)) + bgc = 0x3f; + + // Draw screen: + BackFill(bgc, sh, est); + if (est->Pico->video.reg[1]&0x40) { + int width = (est->Pico->video.reg[12]&1) ? 320 : 256; + DrawDisplay(sh); + // partial line blanking (display on or off inside the line) + if (unlikely(off|on)) { + if (off > 0) + memset(est->HighCol+8 + off, bgc, width-off); + if (on > 0) + memset(est->HighCol+8, bgc, on); + } + } + + if (FinalizeLine != NULL) + FinalizeLine(sh, line, est); + + if (PicoScanEnd != NULL) + skip_next_line = PicoScanEnd(line + offs); + + est->HighCol += HighColIncrement; + est->DrawLineDest = (char *)est->DrawLineDest + DrawLineDestIncrement; +} + +void PicoDrawSync(int to, int off, int on) +{ + struct PicoEState *est = &Pico.est; + int line, offs; + int sh = (est->Pico->video.reg[0xC] & 8) >> 3; // shadow/hilight? + int bgc = est->Pico->video.reg[7] & 0x3f; pprof_start(draw); - if (rendlines != 240) - offs = 8; + offs = (240-rendlines) >> 1; + if (to >= rendlines) + to = rendlines-1; - for (line = DrawScanline; line < to; line++) - { - PicoLine(line, offs, sh, bgc); + if (est->DrawScanline <= to && + (est->rendstatus & (PDRAW_DIRTY_SPRITES|PDRAW_PARSE_SPRITES))) + ParseSprites(to + 1, on); + else if (!(est->rendstatus & PDRAW_SYNC_NEEDED)) { + // nothing has changed in VDP/VRAM and buffer is the same -> no sync needed + int count = to+1 - est->DrawScanline; + est->HighCol += count*HighColIncrement; + est->DrawLineDest = (char *)est->DrawLineDest + count*DrawLineDestIncrement; + est->DrawScanline = to+1; + return; } + for (line = est->DrawScanline; line < to; line++) + PicoLine(line, offs, sh, bgc, 0, 0); + // last line if (line <= to) { - if (blank_last_line) - DrawBlankedLine(line, offs, sh, bgc); - else PicoLine(line, offs, sh, bgc); + int width2 = (est->Pico->video.reg[12]&1) ? 160 : 128; + + if (unlikely(on|off) && (off >= width2 || + // hack for timing inaccuracy, if on/off near borders + (off && off <= 24) || (on < width2 && on >= width2-24))) + DrawBlankedLine(line, offs, sh, bgc); + else { + if (on > width2) on = 0; // on, before start of line? + PicoLine(line, offs, sh, bgc, 2*off, 2*on); + } line++; } - DrawScanline = line; + est->DrawScanline = line; pprof_end(draw); } +void PicoDrawRefreshSprites(void) +{ + struct PicoEState *est = &Pico.est; + unsigned char *sprited = &HighLnSpr[est->DrawScanline][0]; + int i; + + if (est->DrawScanline == 0 || est->DrawScanline >= rendlines) return; + + // compute sprite row. The VDP does this by subtracting the sprite y pos from + // the current line and treating the lower 5 bits as the row number. Y pos + // is reread from SAT cache, which may have changed by now (Overdrive 2). + for (i = 0; i < (sprited[0] & 0x7f); i++) { + int num = sprited[4+i] & 0x7f; + s32 *sp = HighPreSpr + 2*num + (sprited[0] & 0x80)*2; + int link = (sp[0]>>16) & 0x7f; + int sy = (CPU_LE2(VdpSATCache[2*link]) & 0x1ff) - 0x80; + if (sy != (s16)sp[0]) { + // Y info in SAT cache has really changed + sy = est->DrawScanline - ((est->DrawScanline - sy) & 0x1f); + sp[0] = (sp[0] & 0xffff0000) | (u16)sy; + } + } +} + +void PicoDrawBgcDMA(u16 *base, u32 source, u32 mask, int dlen, int sl) +{ + struct PicoEState *est = &Pico.est; + int len = (est->Pico->video.reg[12]&1) ? 320 : 256; + int xl = (est->Pico->video.reg[12]&1) ? 38 : 33; // DMA slots during HSYNC + + BgcDMAbase = base; + BgcDMAsrc = source; + BgcDMAmask = mask; + BgcDMAlen = dlen; + BgcDMAoffs = 0; + + // handle slot offset in 1st line + if (sl-12 > 0) + BgcDMAoffs = 2*(sl-12); + else if (sl < 0) { // DMA starts before active display + BgcDMAsrc += 2*-sl; + BgcDMAlen -= 2*-sl; + } + + // skip 1st line if it had been drawn already + if (Pico.est.DrawScanline > Pico.m.scanline) { + len -= BgcDMAoffs; + BgcDMAsrc += (len>>1)+xl; + BgcDMAlen -= (len>>1)+xl; + BgcDMAoffs = 0; + } + if (BgcDMAlen > 0) + est->rendstatus |= PDRAW_BGC_DMA; +} + // also works for fast renderer void PicoDrawUpdateHighPal(void) { - int sh = (Pico.video.reg[0xC] & 8) >> 3; // shadow/hilight? - if (PicoOpt & POPT_ALT_RENDERER) - sh = 0; // no s/h support + struct PicoEState *est = &Pico.est; + if (est->Pico->m.dirtyPal) { + int sh = (est->Pico->video.reg[0xC] & 8) >> 3; // shadow/hilight? + if ((*est->PicoOpt & POPT_ALT_RENDERER) | (est->rendstatus & PDRAW_SONIC_MODE)) + sh = 0; // no s/h support - PicoDoHighPal555(sh); - if (rendstatus & PDRAW_SONIC_MODE) { - // FIXME? - memcpy(HighPal + 0x40, HighPal, 0x40*2); - memcpy(HighPal + 0x80, HighPal, 0x40*2); + if (PicoIn.AHW & PAHW_SMS) + PicoDoHighPal555SMS(); + else if (FinalizeLine == FinalizeLine8bit) + PicoDoHighPal555_8bit(sh, 0, est); + else + PicoDoHighPal555(sh, 0, est); + + // cover for sprite priority bits if not in s/h or sonic mode + if (!sh && !(est->rendstatus & PDRAW_SONIC_MODE)) { + blockcpy(est->HighPal+0x40, est->HighPal, 0x40*2); + blockcpy(est->HighPal+0x80, est->HighPal, 0x80*2); + } + est->HighPal[0xe0] = 0x0000; // black and white, reserved for OSD + est->HighPal[0xf0] = 0xffff; } } void PicoDrawSetOutFormat(pdso_t which, int use_32x_line_mode) { + PicoDrawSetInternalBuf(NULL, 0); + PicoDrawSetOutBufMD(NULL, 0); + PicoDraw2SetOutBuf(NULL, 0); switch (which) { case PDF_8BIT: @@ -1548,7 +2264,7 @@ void PicoDrawSetOutFormat(pdso_t which, int use_32x_line_mode) break; case PDF_RGB555: - if ((PicoAHW & PAHW_32X) && use_32x_line_mode) + if ((PicoIn.AHW & PAHW_32X) && use_32x_line_mode) FinalizeLine = FinalizeLine32xRGB555; else FinalizeLine = FinalizeLine555; @@ -1558,29 +2274,56 @@ void PicoDrawSetOutFormat(pdso_t which, int use_32x_line_mode) FinalizeLine = NULL; break; } - PicoDrawSetOutFormat32x(which, use_32x_line_mode); - PicoDrawSetOutputMode4(which); + if (PicoIn.AHW & PAHW_32X) + PicoDrawSetOutFormat32x(which, use_32x_line_mode); + PicoDrawSetOutputSMS(which); rendstatus_old = -1; + Pico.m.dirtyPal = 1; +} + +void PicoDrawSetOutBufMD(void *dest, int increment) +{ + if (FinalizeLine == FinalizeLine8bit && increment >= 328) { + // kludge for no-copy mode, using ALT_RENDERER layout + PicoDrawSetInternalBuf(dest, increment); + } else if (FinalizeLine == NULL) { + PicoDrawSetInternalBuf(dest, increment); // needed for SMS + PicoDraw2SetOutBuf(dest, increment); + } else if (dest != NULL) { + if (dest != DrawLineDestBase) + Pico.est.rendstatus |= PDRAW_SYNC_NEEDED; + DrawLineDestBase = dest; + DrawLineDestIncrement = increment; + Pico.est.DrawLineDest = (char *)DrawLineDestBase + Pico.est.DrawScanline * increment; + } else { + DrawLineDestBase = DefOutBuff; + DrawLineDestIncrement = 0; + Pico.est.DrawLineDest = DefOutBuff; + } } // note: may be called on the middle of frame void PicoDrawSetOutBuf(void *dest, int increment) { - DrawLineDestBase = dest; - DrawLineDestIncrement = increment; - DrawLineDest = DrawLineDestBase + DrawScanline * increment; + if (PicoIn.AHW & PAHW_32X) + PicoDrawSetOutBuf32X(dest, increment); + else + PicoDrawSetOutBufMD(dest, increment); } void PicoDrawSetInternalBuf(void *dest, int increment) { if (dest != NULL) { + if (dest != HighColBase) + Pico.est.rendstatus |= PDRAW_SYNC_NEEDED; HighColBase = dest; HighColIncrement = increment; - HighCol = HighColBase + DrawScanline * increment; + Pico.est.HighCol = HighColBase + Pico.est.DrawScanline * increment; } else { HighColBase = DefHighCol; HighColIncrement = 0; + Pico.est.HighCol = DefHighCol; } } @@ -1591,7 +2334,7 @@ void PicoDrawSetCallbacks(int (*begin)(unsigned int num), int (*end)(unsigned in PicoScan32xBegin = NULL; PicoScan32xEnd = NULL; - if ((PicoAHW & PAHW_32X) && FinalizeLine != FinalizeLine32xRGB555) { + if ((PicoIn.AHW & PAHW_32X) && FinalizeLine != FinalizeLine32xRGB555) { PicoScan32xBegin = begin; PicoScan32xEnd = end; } @@ -1600,3 +2343,12 @@ void PicoDrawSetCallbacks(int (*begin)(unsigned int num), int (*end)(unsigned in PicoScanEnd = end; } } + +void PicoDrawInit(void) +{ + Pico.est.DrawLineDest = DefOutBuff; + Pico.est.HighCol = HighColBase; + rendstatus_old = -1; +} + +// vim:ts=2:sw=2:expandtab diff --git a/pico/draw2.c b/pico/draw2.c index 5730d7b6..b4d0ec76 100644 --- a/pico/draw2.c +++ b/pico/draw2.c @@ -1,6 +1,7 @@ /* * tile renderer * (C) notaz, 2006-2008 + * (C) irixxxx, 2020-2023 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -11,6 +12,9 @@ #define START_ROW 0 // which row of tiles to start rendering at? #define END_ROW 28 // ..end +#define VSRAM 0 // 2-cell vscroll (broken for line based hscroll) +#define INTERLACE 0 // interlace mode 2 + #define TILE_ROWS END_ROW-START_ROW // note: this is not implemented in ARM asm @@ -20,33 +24,37 @@ #define LINE_WIDTH 328 #endif -static unsigned char PicoDraw2FB_[(8+320) * (8+240+8)]; -unsigned char *PicoDraw2FB = PicoDraw2FB_; +static unsigned char PicoDraw2FB_[LINE_WIDTH * (8+240+8) + 8]; -static int HighCache2A[41*(TILE_ROWS+1)+1+1]; // caches for high layers -static int HighCache2B[41*(TILE_ROWS+1)+1+1]; +static u32 HighCache2A[2*41*(TILE_ROWS+1)+1+1]; // caches for high layers +static u32 HighCache2B[2*41*(TILE_ROWS+1)+1+1]; -unsigned short *PicoCramHigh=Pico.cram; // pointer to CRAM buff (0x40 shorts), converted to native device color (works only with 16bit for now) -void (*PicoPrepareCram)()=0; // prepares PicoCramHigh for renderer to use +unsigned short *PicoCramHigh=PicoMem.cram; // pointer to CRAM buff (0x40 shorts), converted to native device color (works only with 16bit for now) +void (*PicoPrepareCram)(void) = NULL; // prepares PicoCramHigh for renderer to use // stuff available in asm: #ifdef _ASM_DRAW_C -void BackFillFull(int reg7); -void DrawLayerFull(int plane, int *hcache, int planestart, int planeend); -void DrawTilesFromCacheF(int *hc); -void DrawWindowFull(int start, int end, int prio); -void DrawSpriteFull(unsigned int *sprite); +void BackFillFull(void *dst, int reg7, int lwidth); +void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend, + struct PicoEState *est); +void DrawTilesFromCacheF(u32 *hc, struct PicoEState *est); +void DrawWindowFull(int start, int end, int prio, struct PicoEState *est); +void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est); #else -static int TileXnormYnorm(unsigned char *pd,int addr,unsigned char pal) +static int TileXnormYnorm(unsigned char *pd,int addr,unsigned char pal, struct PicoEState *est) { unsigned int pack=0; unsigned int t=0, blank = 1; - int i; + unsigned short *vram = est->PicoMem_vram; + int i, inc=2; - for(i=8; i; i--, addr+=2, pd += LINE_WIDTH) { - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels +#if INTERLACE + if (est->rendstatus & PDRAW_INTERLACE) inc = 4; +#endif + for(i=8; i; i--, addr+=inc, pd += est->Draw2Width) { + pack=*(u32 *)(vram+addr); // Get 8 pixels if(!pack) continue; t=pack&0x0000f000; if (t) pd[0]=(unsigned char)((t>>12)|pal); @@ -63,13 +71,17 @@ static int TileXnormYnorm(unsigned char *pd,int addr,unsigned char pal) return blank; // Tile blank? } -static int TileXflipYnorm(unsigned char *pd,int addr,unsigned char pal) +static int TileXflipYnorm(unsigned char *pd,int addr,unsigned char pal, struct PicoEState *est) { unsigned int pack=0; unsigned int t=0, blank = 1; - int i; + unsigned short *vram = est->PicoMem_vram; + int i, inc=2; - for(i=8; i; i--, addr+=2, pd += LINE_WIDTH) { - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels +#if INTERLACE + if (est->rendstatus & PDRAW_INTERLACE) inc = 4; +#endif + for(i=8; i; i--, addr+=inc, pd += est->Draw2Width) { + pack=*(u32 *)(vram+addr); // Get 8 pixels if(!pack) continue; t=pack&0x000f0000; if (t) pd[0]=(unsigned char)((t>>16)|pal); @@ -85,14 +97,18 @@ static int TileXflipYnorm(unsigned char *pd,int addr,unsigned char pal) return blank; // Tile blank? } -static int TileXnormYflip(unsigned char *pd,int addr,unsigned char pal) +static int TileXnormYflip(unsigned char *pd,int addr,unsigned char pal, struct PicoEState *est) { unsigned int pack=0; unsigned int t=0, blank = 1; - int i; + unsigned short *vram = est->PicoMem_vram; + int i, inc=2; +#if INTERLACE + if (est->rendstatus & PDRAW_INTERLACE) inc = 4, addr += 16; +#endif addr+=14; - for(i=8; i; i--, addr-=2, pd += LINE_WIDTH) { - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + for(i=8; i; i--, addr-=inc, pd += est->Draw2Width) { + pack=*(u32 *)(vram+addr); // Get 8 pixels if(!pack) continue; t=pack&0x0000f000; if (t) pd[0]=(unsigned char)((t>>12)|pal); @@ -109,14 +125,18 @@ static int TileXnormYflip(unsigned char *pd,int addr,unsigned char pal) return blank; // Tile blank? } -static int TileXflipYflip(unsigned char *pd,int addr,unsigned char pal) +static int TileXflipYflip(unsigned char *pd,int addr,unsigned char pal, struct PicoEState *est) { unsigned int pack=0; unsigned int t=0, blank = 1; - int i; + unsigned short *vram = est->PicoMem_vram; + int i, inc=2; +#if INTERLACE + if (est->rendstatus & PDRAW_INTERLACE) inc = 4, addr += 16; +#endif addr+=14; - for(i=8; i; i--, addr-=2, pd += LINE_WIDTH) { - pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + for(i=8; i; i--, addr-=inc, pd += est->Draw2Width) { + pack=*(u32 *)(vram+addr); // Get 8 pixels if(!pack) continue; t=pack&0x000f0000; if (t) pd[0]=(unsigned char)((t>>16)|pal); @@ -134,11 +154,12 @@ static int TileXflipYflip(unsigned char *pd,int addr,unsigned char pal) // start: (tile_start<<16)|row_start, end: [same] -static void DrawWindowFull(int start, int end, int prio) +static void DrawWindowFull(int start, int end, int prio, struct PicoEState *est) { - struct PicoVideo *pvid=&Pico.video; + struct PicoVideo *pvid=&est->Pico->video; int nametab, nametab_step, trow, tilex, blank=-1, code; - unsigned char *scrpos = PicoDraw2FB; + unsigned char *scrpos = est->Draw2FB; + int scrstart = est->Draw2Start; int tile_start, tile_end; // in cells // parse ranges @@ -148,7 +169,7 @@ static void DrawWindowFull(int start, int end, int prio) end = end<<16>>16; // Find name table line: - if (pvid->reg[12]&1) + if (!(est->rendstatus & PDRAW_32_COLS)) { nametab=(pvid->reg[3]&0x3c)<<9; // 40-cell mode nametab_step = 1<<6; @@ -157,15 +178,17 @@ static void DrawWindowFull(int start, int end, int prio) { nametab=(pvid->reg[3]&0x3e)<<9; // 32-cell mode nametab_step = 1<<5; + if (est->rendstatus & PDRAW_BORDER_32) + scrpos += 32; } - nametab += nametab_step*start; + nametab += nametab_step*(start-scrstart); // check priority - code=Pico.vram[nametab+tile_start]; + code=est->PicoMem_vram[nametab+tile_start]; if ((code>>15) != prio) return; // hack: just assume that whole window uses same priority - scrpos+=8*LINE_WIDTH+8; - scrpos+=8*LINE_WIDTH*(start-START_ROW); + scrpos+=8*est->Draw2Width+8; + scrpos+=8*est->Draw2Width*(start-scrstart); // do a window until we reach planestart row for(trow = start; trow < end; trow++, nametab+=nametab_step) { // current tile row @@ -175,36 +198,43 @@ static void DrawWindowFull(int start, int end, int prio) // unsigned short *pal=NULL; unsigned char pal; - code=Pico.vram[nametab+tilex]; + code=est->PicoMem_vram[nametab+tilex]; if (code==blank) continue; // Get tile address/2: - addr=(code&0x7ff)<<4; +#if INTERLACE + if (est->rendstatus & PDRAW_INTERLACE) + addr=(code&0x3ff)<<5; + else +#endif + addr=(code&0x7ff)<<4; // pal=PicoCramHigh+((code>>9)&0x30); pal=(unsigned char)((code>>9)&0x30); switch((code>>11)&3) { - case 0: zero=TileXnormYnorm(scrpos+(tilex<<3),addr,pal); break; - case 1: zero=TileXflipYnorm(scrpos+(tilex<<3),addr,pal); break; - case 2: zero=TileXnormYflip(scrpos+(tilex<<3),addr,pal); break; - case 3: zero=TileXflipYflip(scrpos+(tilex<<3),addr,pal); break; + case 0: zero=TileXnormYnorm(scrpos+(tilex<<3),addr,pal,est); break; + case 1: zero=TileXflipYnorm(scrpos+(tilex<<3),addr,pal,est); break; + case 2: zero=TileXnormYflip(scrpos+(tilex<<3),addr,pal,est); break; + case 3: zero=TileXflipYflip(scrpos+(tilex<<3),addr,pal,est); break; } if(zero) blank=code; // We know this tile is blank now } - scrpos += LINE_WIDTH*8; + scrpos += est->Draw2Width*8; } } -static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend) +static void DrawLayerFull(int plane, u32 *hcache, int planestart, int planeend, + struct PicoEState *est) { - struct PicoVideo *pvid=&Pico.video; - static char shift[4]={5,6,6,7}; // 32,64 or 128 sized tilemaps + struct PicoVideo *pvid=&est->Pico->video; + static char shift[4]={5,6,5,7}; // 32,64 or 128 sized tilemaps int width, height, ymask, htab; int nametab, hscroll=0, vscroll, cells; unsigned char *scrpos; + int scrstart = est->Draw2Start; int blank=-1, xmask, nametab_row, trow; // parse ranges @@ -221,7 +251,7 @@ static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend) if(!(pvid->reg[11]&3)) { // full screen scroll // Get horizontal scroll value - hscroll=Pico.vram[htab&0x7fff]; + hscroll=est->PicoMem_vram[htab&0x7fff]; htab = 0; // this marks that we don't have to update scroll value } @@ -238,104 +268,138 @@ static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend) if (plane==0) nametab=(pvid->reg[2]&0x38)<< 9; // A else nametab=(pvid->reg[4]&0x07)<<12; // B - scrpos = PicoDraw2FB; - scrpos+=8*LINE_WIDTH*(planestart-START_ROW); - - // Get vertical scroll value: - vscroll=Pico.vsram[plane]&0x1ff; - scrpos+=(8-(vscroll&7))*LINE_WIDTH; - if(vscroll&7) planeend++; // we have vertically clipped tiles due to vscroll, so we need 1 more row - - *hcache++ = 8-(vscroll&7); // push y-offset to tilecache - + scrpos = est->Draw2FB; + if ((~est->rendstatus & (PDRAW_BORDER_32|PDRAW_32_COLS)) == 0) + scrpos += 32; + scrpos+=8*est->Draw2Width*(planestart-scrstart); + if((pvid->reg[11]&4)||(PicoMem.vsram[plane]&7)) + planeend++; // we (may) have vertically clipped tiles due to vscroll, so we need 1 more row for(trow = planestart; trow < planeend; trow++) { // current tile row - int cellc=cells,tilex,dx; + int cellc=cells,tilex,dx,vsidx=0; + + // Get vertical scroll value: + vscroll=PicoMem.vsram[plane];//&0x1ff; +#if VSRAM + if ((est->rendstatus & PDRAW_32_COLS) && (pvid->reg[11]&4)) // H32 + 2-cell mode + vscroll=PicoMem.vsram[plane+0x20];//&0x1ff; +#endif +#if INTERLACE + if (est->rendstatus & PDRAW_INTERLACE) + vscroll >>= 1; +#endif + nametab_row = nametab + (((trow+(vscroll>>3))&ymask)<>3)<>3))&ymask)<PicoMem_vram[htaddr&0x7fff]; } // Draw tiles across screen: tilex=(-hscroll)>>3; dx=((hscroll-1)&7)+1; - if(dx != 8) cellc++; // have hscroll, do more cells + if(dx != 8) cellc++, vsidx--; // have hscroll, do more cells for (; cellc; dx+=8,tilex++,cellc--) { - int code=0,addr=0,zero=0; + int code=0,addr=0,zero=0,scroff; // unsigned short *pal=NULL; unsigned char pal; - code=Pico.vram[nametab_row+(tilex&xmask)]; +#if VSRAM + if ((pvid->reg[11]&4) && !(vsidx&1)) { // 2-cell mode + vscroll=PicoMem.vsram[vsidx+plane];//&0x1ff; +#if INTERLACE + if (est->rendstatus & PDRAW_INTERLACE) + vscroll >>= 1; +#endif + nametab_row = nametab + (((trow+(vscroll>>3))&ymask)<PicoMem_vram[nametab_row+(tilex&xmask)]; if (code==blank) continue; if (code>>15) { // high priority tile *hcache++ = code|(dx<<16)|(trow<<27); // cache it + *hcache++ = 8-(vscroll&7); // push y-offset to tilecache continue; } // Get tile address/2: - addr=(code&0x7ff)<<4; +#if INTERLACE + if (est->rendstatus & PDRAW_INTERLACE) + addr=(code&0x3ff)<<5; + else +#endif + addr=(code&0x7ff)<<4; // pal=PicoCramHigh+((code>>9)&0x30); pal=(unsigned char)((code>>9)&0x30); + scroff=(8-(vscroll&7))*est->Draw2Width; switch((code>>11)&3) { - case 0: zero=TileXnormYnorm(scrpos+dx,addr,pal); break; - case 1: zero=TileXflipYnorm(scrpos+dx,addr,pal); break; - case 2: zero=TileXnormYflip(scrpos+dx,addr,pal); break; - case 3: zero=TileXflipYflip(scrpos+dx,addr,pal); break; + case 0: zero=TileXnormYnorm(scrpos+scroff+dx,addr,pal,est); break; + case 1: zero=TileXflipYnorm(scrpos+scroff+dx,addr,pal,est); break; + case 2: zero=TileXnormYflip(scrpos+scroff+dx,addr,pal,est); break; + case 3: zero=TileXflipYflip(scrpos+scroff+dx,addr,pal,est); break; } if(zero) blank=code; // We know this tile is blank now } - scrpos += LINE_WIDTH*8; + scrpos += est->Draw2Width*8; } *hcache = 0; // terminate cache } -static void DrawTilesFromCacheF(int *hc) +static void DrawTilesFromCacheF(u32 *hc, struct PicoEState *est) { - int code, addr, zero = 0; + u32 code; + int addr, zero = 0, vscroll; unsigned int prevy=0xFFFFFFFF; // unsigned short *pal; unsigned char pal; short blank=-1; // The tile we know is blank - unsigned char *scrpos = PicoDraw2FB, *pd = 0; + unsigned char *scrpos = est->Draw2FB, *pd = 0; + int scrstart = est->Draw2Start; - // *hcache++ = code|(dx<<16)|(trow<<27); // cache it - scrpos+=(*hc++)*LINE_WIDTH - START_ROW*LINE_WIDTH*8; + if ((~est->rendstatus & (PDRAW_BORDER_32|PDRAW_32_COLS)) == 0) + scrpos += 32; while((code=*hc++)) { + vscroll=(*hc++ - START_ROW*8)*est->Draw2Width; if((short)code == blank) continue; // y pos if(((unsigned)code>>27) != prevy) { prevy = (unsigned)code>>27; - pd = scrpos + prevy*LINE_WIDTH*8; + pd = scrpos + (prevy-scrstart)*est->Draw2Width*8; } // Get tile address/2: - addr=(code&0x7ff)<<4; +#if INTERLACE + if (est->rendstatus & PDRAW_INTERLACE) + addr=(code&0x3ff)<<5; + else +#endif + addr=(code&0x7ff)<<4; // pal=PicoCramHigh+((code>>9)&0x30); pal=(unsigned char)((code>>9)&0x30); switch((code>>11)&3) { - case 0: zero=TileXnormYnorm(pd+((code>>16)&0x1ff),addr,pal); break; - case 1: zero=TileXflipYnorm(pd+((code>>16)&0x1ff),addr,pal); break; - case 2: zero=TileXnormYflip(pd+((code>>16)&0x1ff),addr,pal); break; - case 3: zero=TileXflipYflip(pd+((code>>16)&0x1ff),addr,pal); break; + case 0: zero=TileXnormYnorm(pd+vscroll+((code>>16)&0x1ff),addr,pal,est); break; + case 1: zero=TileXflipYnorm(pd+vscroll+((code>>16)&0x1ff),addr,pal,est); break; + case 2: zero=TileXnormYflip(pd+vscroll+((code>>16)&0x1ff),addr,pal,est); break; + case 3: zero=TileXflipYflip(pd+vscroll+((code>>16)&0x1ff),addr,pal,est); break; } if(zero) blank=(short)code; @@ -344,18 +408,24 @@ static void DrawTilesFromCacheF(int *hc) // sx and sy are coords of virtual screen with 8pix borders on top and on left -static void DrawSpriteFull(unsigned int *sprite) +static void DrawSpriteFull(u32 *sprite, struct PicoEState *est) { int width=0,height=0; // unsigned short *pal=NULL; unsigned char pal; int tile,code,tdeltax,tdeltay; unsigned char *scrpos; + int scrstart = est->Draw2Start; int sx, sy; sy=sprite[0]; height=sy>>24; - sy=(sy&0x1ff)-0x78; // Y +#if INTERLACE + if (est->rendstatus & PDRAW_INTERLACE) + sy = ((sy>>1)&0x1ff)-0x78; + else +#endif + sy=(sy&0x1ff)-0x78; // Y width=(height>>2)&3; height&=3; width++; height++; // Width and height in tiles @@ -365,22 +435,25 @@ static void DrawSpriteFull(unsigned int *sprite) tile=code&0x7ff; // Tile number tdeltax=height; // Delta to increase tile by going right tdeltay=1; // Delta to increase tile by going down - if (code&0x0800) { tdeltax=-tdeltax; tile+=height*(width-1); } // Flip X - if (code&0x1000) { tdeltay=-tdeltay; tile+=height-1; } // Flip Y + if (code&0x1000) { tile+=tdeltax-1; tdeltay=-tdeltay; } // Flip Y + if (code&0x0800) { tile+=tdeltax*(width-1); tdeltax=-tdeltax; } // Flip X //delta<<=4; // Delta of address // pal=PicoCramHigh+((code>>9)&0x30); // Get palette pointer pal=(unsigned char)((code>>9)&0x30); // goto first vertically visible tile - while(sy <= START_ROW*8) { sy+=8; tile+=tdeltay; height--; } + sy -= scrstart*8; + while(sy <= 0) { sy+=8; tile+=tdeltay; height--; } - scrpos = PicoDraw2FB; - scrpos+=(sy-START_ROW*8)*LINE_WIDTH; + scrpos = est->Draw2FB; + if ((~est->rendstatus & (PDRAW_BORDER_32|PDRAW_32_COLS)) == 0) + scrpos += 32; + scrpos+=sy*est->Draw2Width; for (; height > 0; height--, sy+=8, tile+=tdeltay) { - int w = width, x=sx, t=tile; + int w = width, x=sx, t=tile, s; if(sy >= END_ROW*8+8) return; // offscreen @@ -390,44 +463,59 @@ static void DrawSpriteFull(unsigned int *sprite) if(x>=328) break; // Offscreen t&=0x7fff; // Clip tile address +#if INTERLACE + if (est->rendstatus & PDRAW_INTERLACE) + s=5; + else +#endif + s=4; switch((code>>11)&3) { - case 0: TileXnormYnorm(scrpos+x,t<<4,pal); break; - case 1: TileXflipYnorm(scrpos+x,t<<4,pal); break; - case 2: TileXnormYflip(scrpos+x,t<<4,pal); break; - case 3: TileXflipYflip(scrpos+x,t<<4,pal); break; + case 0: TileXnormYnorm(scrpos+x,t<Draw2Width; } } #endif -static void DrawAllSpritesFull(int prio, int maxwidth) +static void DrawAllSpritesFull(int prio, int maxwidth, struct PicoEState *est) { - struct PicoVideo *pvid=&Pico.video; + struct PicoVideo *pvid=&est->Pico->video; int table=0,maskrange=0; int i,u,link=0; - unsigned int *sprites[80]; // Sprites + u32 *sprites[80]; // Sprites int y_min=START_ROW*8, y_max=END_ROW*8; // for a simple sprite masking + int max_sprites = !(est->rendstatus & PDRAW_32_COLS) ? 80 : 64; + + if (est->rendstatus & PDRAW_30_ROWS) + y_min += 8, y_max += 8; table=pvid->reg[5]&0x7f; - if (pvid->reg[12]&1) table&=0x7e; // Lowest bit 0 in 40-cell mode + if (!(est->rendstatus & PDRAW_32_COLS)) table&=0x7e; // Lowest bit 0 in 40-cell mode table<<=8; // Get sprite table address/2 - for (i=u=0; u < 80; u++) + for (i = u = 0; u < max_sprites && link < max_sprites; u++) { - unsigned int *sprite=NULL; + u32 *sprite=NULL; int code, code2, sx, sy, height; - sprite=(unsigned int *)(Pico.vram+((table+(link<<2))&0x7ffc)); // Find sprite + sprite=(u32 *)(est->PicoMem_vram+((table+(link<<2))&0x7ffc)); // Find sprite // get sprite info code = sprite[0]; // check if it is not hidden vertically - sy = (code&0x1ff)-0x80; +#if INTERLACE + if (est->rendstatus & PDRAW_INTERLACE) + sy = ((code>>1)&0x1ff)-0x80; + else +#endif + sy = (code&0x1ff)-0x80; height = (((code>>24)&3)+1)<<3; if(sy+height <= y_min || sy > y_max) goto nextsprite; @@ -466,40 +554,52 @@ static void DrawAllSpritesFull(int prio, int maxwidth) } // Go through sprites backwards: - for (i-- ;i>=0; i--) + for (i--; i >= 0; i--) { - DrawSpriteFull(sprites[i]); + DrawSpriteFull(sprites[i], est); } } #ifndef _ASM_DRAW_C -static void BackFillFull(int reg7) +static void BackFillFull(unsigned char *dst, int reg7, int lwidth) { unsigned int back; + int i; // Start with a background color: -// back=PicoCramHigh[reg7&0x3f]; back=reg7&0x3f; back|=back<<8; back|=back<<16; - memset32((int *)PicoDraw2FB, back, LINE_WIDTH*(8+(END_ROW-START_ROW)*8)/4); + for (i = 0, dst += 8*lwidth; i < (END_ROW-START_ROW)*8; i++, dst += lwidth) + memset32(dst+8, back, 320/4); } #endif static void DrawDisplayFull(void) { - struct PicoVideo *pvid=&Pico.video; + struct PicoEState *est = &Pico.est; + struct PicoVideo *pvid=&est->Pico->video; int win, edge=0, hvwin=0; // LSb->MSb: hwin&plane, vwin&plane, full - int planestart=START_ROW, planeend=END_ROW; // plane A start/end when window shares display with plane A (in tile rows or columns) - int winstart=START_ROW, winend=END_ROW; // same for window + int scrstart=START_ROW, scrend = END_ROW; // our render area + int planestart, planeend; // plane A start/end when window shares display with plane A (in tile rows or columns) + int winstart, winend; // same for window int maxw, maxcolc; // max width and col cells - if(pvid->reg[12]&1) { - maxw = 328; maxcolc = 40; - } else { + if(est->rendstatus & PDRAW_32_COLS) { maxw = 264; maxcolc = 32; + } else { + maxw = 328; maxcolc = 40; } + if(est->rendstatus & PDRAW_30_ROWS) { + // In 240 line mode, the top and bottom 8 lines are omitted + // since this renderer always renders 224 lines + scrstart ++, scrend ++; + } + est->Draw2Start = scrstart; + + planestart = scrstart, planeend = scrend; + winstart = scrstart, winend = scrend; // horizontal window? if ((win=pvid->reg[0x12])) @@ -511,12 +611,12 @@ static void DrawDisplayFull(void) hvwin=4; } else if(win < 0x80) { // window on the top - if(edge <= START_ROW) hvwin=0; // window not visible in our drawing region - else if(edge >= END_ROW) hvwin=4; + if(edge <= scrstart) hvwin=0; // window not visible in our drawing region + else if(edge >= scrend) hvwin=4; else planestart = winend = edge; } else if(win > 0x80) { // window at the bottom - if(edge >= END_ROW) hvwin=0; + if(edge >= scrend) hvwin=0; else planeend = winstart = edge; } } @@ -549,65 +649,65 @@ static void DrawDisplayFull(void) if (hvwin==1) { winend|=maxcolc<<16; planeend|=maxcolc<<16; } - HighCache2A[1] = HighCache2B[1] = 0; - if (PicoDrawMask & PDRAW_LAYERB_ON) - DrawLayerFull(1, HighCache2B, START_ROW, (maxcolc<<16)|END_ROW); - if (PicoDrawMask & PDRAW_LAYERA_ON) switch (hvwin) + HighCache2A[0] = HighCache2B[0] = 0; + if (!(pvid->debug_p & PVD_KILL_B)) + DrawLayerFull(1, HighCache2B, scrstart, (maxcolc<<16)|scrend, est); + if (!(pvid->debug_p & PVD_KILL_A)) switch (hvwin) { case 4: // fullscreen window - DrawWindowFull(START_ROW, (maxcolc<<16)|END_ROW, 0); + DrawWindowFull(scrstart, (maxcolc<<16)|scrend, 0, est); break; case 3: // we have plane A and both v and h windows - DrawLayerFull(0, HighCache2A, planestart, planeend); - DrawWindowFull( winstart&~0xff0000, (winend&~0xff0000)|(maxcolc<<16), 0); // h - DrawWindowFull((winstart&~0xff)|START_ROW, (winend&~0xff)|END_ROW, 0); // v + DrawLayerFull(0, HighCache2A, planestart, planeend, est); + DrawWindowFull( winstart&~0xff0000, (winend&~0xff0000)|(maxcolc<<16), 0, est); // h + DrawWindowFull((winstart&~0xff)|scrstart, (winend&~0xff)|scrend, 0, est); // v break; case 2: case 1: // both window and plane A visible, window is vertical XOR horizontal - DrawLayerFull(0, HighCache2A, planestart, planeend); - DrawWindowFull(winstart, winend, 0); + DrawLayerFull(0, HighCache2A, planestart, planeend, est); + DrawWindowFull(winstart, winend, 0, est); break; default: // fullscreen plane A - DrawLayerFull(0, HighCache2A, START_ROW, (maxcolc<<16)|END_ROW); + DrawLayerFull(0, HighCache2A, scrstart, (maxcolc<<16)|scrend, est); break; } - if (PicoDrawMask & PDRAW_SPRITES_LOW_ON) - DrawAllSpritesFull(0, maxw); + if (!(pvid->debug_p & PVD_KILL_S_LO)) + DrawAllSpritesFull(0, maxw, est); - if (HighCache2B[1]) DrawTilesFromCacheF(HighCache2B); - if (HighCache2A[1]) DrawTilesFromCacheF(HighCache2A); - if (PicoDrawMask & PDRAW_LAYERA_ON) switch (hvwin) + if (HighCache2B[0]) DrawTilesFromCacheF(HighCache2B, est); + if (HighCache2A[0]) DrawTilesFromCacheF(HighCache2A, est); + if (!(pvid->debug_p & PVD_KILL_A)) switch (hvwin) { case 4: // fullscreen window - DrawWindowFull(START_ROW, (maxcolc<<16)|END_ROW, 1); + DrawWindowFull(scrstart, (maxcolc<<16)|scrend, 1, est); break; case 3: // we have plane A and both v and h windows - DrawWindowFull( winstart&~0xff0000, (winend&~0xff0000)|(maxcolc<<16), 1); // h - DrawWindowFull((winstart&~0xff)|START_ROW, (winend&~0xff)|END_ROW, 1); // v + DrawWindowFull( winstart&~0xff0000, (winend&~0xff0000)|(maxcolc<<16), 1, est); // h + DrawWindowFull((winstart&~0xff)|scrstart, (winend&~0xff)|scrend, 1, est); // v break; case 2: case 1: // both window and plane A visible, window is vertical XOR horizontal - DrawWindowFull(winstart, winend, 1); + DrawWindowFull(winstart, winend, 1, est); break; } - if (PicoDrawMask & PDRAW_SPRITES_HI_ON) - DrawAllSpritesFull(1, maxw); + if (!(pvid->debug_p & PVD_KILL_S_HI)) + DrawAllSpritesFull(1, maxw, est); } -PICO_INTERNAL void PicoFrameFull() +PICO_INTERNAL void PicoFrameFull(void) { pprof_start(draw); @@ -615,10 +715,37 @@ PICO_INTERNAL void PicoFrameFull() if (PicoPrepareCram) PicoPrepareCram(); // Draw screen: - BackFillFull(Pico.video.reg[7]); + BackFillFull(Pico.est.Draw2FB, Pico.video.reg[7], Pico.est.Draw2Width); if (Pico.video.reg[1] & 0x40) DrawDisplayFull(); + // clear top and bottom 8 lines in 240 mode, since draw2 only does 224 + if (Pico.est.rendstatus & PDRAW_30_ROWS) { + unsigned char *pd = Pico.est.Draw2FB; + int i; + + for (i = 8; i > 0; i--, pd += Pico.est.Draw2Width) + memset32((int *)pd, 0xe0e0e0e0, 328/4); + pd += Pico.est.Draw2Width*(END_ROW-START_ROW)*8; + for (i = 8; i > 0; i--, pd += Pico.est.Draw2Width) + memset32((int *)pd, 0xe0e0e0e0, 328/4); + } + pprof_end(draw); } +void PicoDraw2SetOutBuf(void *dest, int incr) +{ + if (dest) { + Pico.est.Draw2FB = dest; + Pico.est.Draw2Width = incr; + } else { + Pico.est.Draw2FB = PicoDraw2FB_; + Pico.est.Draw2Width = LINE_WIDTH; + } +} + +void PicoDraw2Init(void) +{ + PicoDraw2SetOutBuf(NULL, 0); +} diff --git a/pico/draw2_arm.S b/pico/draw2_arm.S index c37d059a..aa630244 100644 --- a/pico/draw2_arm.S +++ b/pico/draw2_arm.S @@ -1,15 +1,22 @@ /* * assembly optimized versions of most funtions from draw2.c * (C) notaz, 2006-2008 + * (C) irixxxx, 2019-2023 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. * * this is highly specialized, be careful if changing related C code! + * + * NB: this only deals with buffers having line width at 328 */ -.extern Pico -.extern PicoDraw2FB +#include "pico_int_offs.h" + +.equ PDRAW_INTERLACE, (1<<3) +.equ PDRAW_32_COLS, (1<<8) +.equ PDRAW_BORDER_32, (1<<9) +.equ PDRAW_30_ROWS, (1<<11) @ define these constants in your include file: @ .equiv START_ROW, 1 @@ -25,16 +32,19 @@ .text .align 2 -.global BackFillFull @ int reg7 +@ void BackFillFull(unsigned char *dst, int reg7, int lwidth) + +.global BackFillFull BackFillFull: - stmfd sp!, {r4-r9,lr} + stmfd sp!, {r4-r10,lr} - ldr lr, =PicoDraw2FB @ lr=PicoDraw2FB - mov r0, r0, lsl #26 - ldr lr, [lr] + sub r10,r2, #320 @ unused bytes in a line + add lr, r0, #8 @ 8 px overlap area at start of line + add lr, lr, r2, lsl #3 @ 8 lines overlap area at top + + mov r0, r1, lsl #26 mov r0, r0, lsr #26 - add lr, lr, #328*8 orr r0, r0, r0, lsl #8 orr r0, r0, r0, lsl #16 @@ -53,7 +63,6 @@ BackFillFull: @ go go go! .bff_loop: - add lr, lr, #8 subs r12, r12, #1 stmia lr!, {r0-r9} @ 10*4*8 @@ -65,17 +74,27 @@ BackFillFull: stmia lr!, {r0-r9} stmia lr!, {r0-r9} + add lr, lr, r10 @ skip unused rest of line bne .bff_loop - ldmfd sp!, {r4-r9,r12} - bx r12 + ldmfd sp!, {r4-r10,lr} + bx lr .pool @ -------- some macros -------- +@ helpers +.macro add_c24 d s c + add \d, \s, #(\c & 0x00ff00) +.if \c & 0x0000ff + add \d, \d, #(\c & 0x0000ff) +.endif +.if \c & 0xff0000 + add \d, \d, #(\c & 0xff0000) +.endif +.endm -@ helper @ TileLineSinglecol (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r0: pixels8_old .macro TileLineSinglecol notsinglecol=0 and r2, r2, #0xf @ #0x0000000f @@ -343,19 +362,21 @@ BackFillFull: @ DrawLayerTiles(*hcache, *scrpos, (cells<<24)|(nametab<<9)|(vscroll&0x3ff)<<11|(shift[width]<<8)|planeend, (ymask<<24)|(planestart<<16)|[htab||hscroll] -@static void DrawLayerFull(int plane, int *hcache, int planestart, int planeend) +@ void DrawLayerFull(int plane, int *hcache, int planestart, int planeend, +@ struct PicoEState *est) .global DrawLayerFull DrawLayerFull: + ldr r12,[sp] @ est stmfd sp!, {r4-r11,lr} mov r6, r1 @ hcache - ldr r11, =(Pico+0x22228) @ Pico.video - ldr r10, =(Pico+0x10000) @ r10=Pico.vram - ldrb r5, [r11, #13] @ pvid->reg[13] - ldrb r7, [r11, #11] + ldr r11, [r12, #OFS_EST_Pico] + ldr r10, [r12, #OFS_EST_PicoMem_vram] + ldrb r5, [r11, #OFS_Pico_video_reg+13] @ pvid->reg[13] + ldrb r7, [r11, #OFS_Pico_video_reg+11] sub lr, r3, r2 and lr, lr, #0x00ff0000 @ lr=cells @@ -365,7 +386,7 @@ DrawLayerFull: bic r5, r5, #0x00ff0000 @ just in case tst r7, #3 @ full screen scroll? (if ==0) - ldrb r7, [r11, #16] @ ??hh??ww + ldrb r7, [r11, #OFS_Pico_video_reg+16] @ ??hh??ww ldreqh r5, [r10, r5] biceq r5, r5, #0x0000fc00 @ r5=hscroll (0-0x3ff) movne r5, r5, lsr #1 @@ -396,22 +417,26 @@ DrawLayerFull: @ Find name table: tst r0, r0 - ldreqb r4, [r11, #2] + ldreqb r4, [r11, #OFS_Pico_video_reg+2] moveq r4, r4, lsr #3 - ldrneb r4, [r11, #4] + ldrneb r4, [r11, #OFS_Pico_video_reg+4] and r4, r4, #7 orr lr, lr, r4, lsl #13 @ lr|=nametab_bits{3}<<13 - ldr r11, =PicoDraw2FB @ r11=PicoDraw2FB - sub r4, r9, #(START_ROW<<24) - ldr r11, [r11] + ldr r11,[sp, #9*4] @ est + ldr r4, [r11, #OFS_EST_Draw2Start] + ldr r7, [r11, #OFS_EST_rendstatus] + ldr r11, [r11, #OFS_EST_Draw2FB] + sub r4, r9, r4, lsl #24 + tst r7, #PDRAW_BORDER_32 @ H32 border mode? + tstne r7, #PDRAW_32_COLS + addne r11, r11, #32 mov r4, r4, asr #24 mov r7, #328*8 - mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-START_ROW); + mla r11, r4, r7, r11 @ scrpos+=8*328*(planestart-Draw2Start); @ Get vertical scroll value: - add r7, r10, #0x012000 - add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180) + add_c24 r7, r10, (OFS_PMEM_vsram-OFS_PMEM_vram) ldr r7, [r7] tst r0, r0 moveq r7, r7, lsl #22 @@ -571,23 +596,28 @@ DrawLayerFull: .pool +@ void DrawTilesFromCacheF(int *hc, struct PicoEState *est) -.global DrawTilesFromCacheF @ int *hc +.global DrawTilesFromCacheF DrawTilesFromCacheF: - stmfd sp!, {r4-r10,lr} + stmfd sp!, {r4-r11,lr} mov r9, #0xff000000 @ r9=prevcode=-1 mvn r6, #0 @ r6=prevy=-1 - ldr r4, =PicoDraw2FB @ r4=PicoDraw2FB - ldr r1, [r0], #4 @ read y offset - ldr r4, [r4] + ldr r7, [r1, #OFS_EST_rendstatus] + ldr r4, [r1, #OFS_EST_Draw2FB] + ldr r11,[r1, #OFS_EST_Draw2Start] + ldr r2, [r0], #4 @ read y offset + tst r7, #PDRAW_BORDER_32 @ H32 border mode? + tstne r7, #PDRAW_32_COLS + addne r4, r4, #32 mov r7, #328 - mla r1, r7, r1, r4 - sub r12, r1, #(328*8*START_ROW) @ r12=scrpos + mla r2, r7, r2, r4 + sub r12, r2, #(328*8*START_ROW) @ r12=scrpos - ldr r10, =(Pico+0x10000) @ r10=Pico.vram + ldr r10, [r1, #OFS_EST_PicoMem_vram] mov r8, r0 @ hc mov r0, #0xf @@ -597,13 +627,14 @@ DrawTilesFromCacheF: .dtfcf_loop: ldr r7, [r8], #4 @ read code movs r1, r7, lsr #16 @ r1=dx; - ldmeqfd sp!, {r4-r10,pc} @ dx is never zero, this must be a terminator, return + ldmeqfd sp!, {r4-r11,pc} @ dx is never zero, this must be a terminator, return @ row changed? cmp r6, r7, lsr #27 movne r6, r7, lsr #27 + subne r6, r6, r11 movne r4, #328*8 - mlane r5, r4, r6, r12 @ r5=pd = scrpos + prevy*328*8 + mlane r5, r4, r6, r12 @ r5=pd = scrpos + (prevy-Draw2Start)*328*8 bic r1, r1, #0xf800 add r1, r5, r1 @ r1=pdest (halfwords) @@ -666,31 +697,41 @@ DrawTilesFromCacheF: @ @@@@@@@@@@@@@@@ @ (tile_start<<16)|row_start -.global DrawWindowFull @ int tstart, int tend, int prio +@ void DrawWindowFull(int start, int end, int prio, struct PicoEState *est) + +.global DrawWindowFull DrawWindowFull: stmfd sp!, {r4-r11,lr} - ldr r11, =(Pico+0x22228) @ Pico.video - ldrb r12, [r11, #3] @ pvid->reg[3] + ldr r11, [r3, #OFS_EST_Pico] + ldrb r12, [r11, #OFS_Pico_video_reg+3] @ pvid->reg[3] mov r12, r12, lsl #10 - ldr r4, [r11, #12] + ldr r4, [r11, #OFS_Pico_video_reg+12] mov r5, #1 @ nametab_step + ldr r11, [r3, #OFS_EST_Draw2FB] + ldr r6, [r3, #OFS_EST_Draw2Start] tst r4, #1 @ 40 cell mode? andne r12, r12, #0xf000 @ 0x3c<<10 - andeq r12, r12, #0xf800 movne r5, r5, lsl #7 - moveq r5, r5, lsl #6 @ nametab_step + bne 0f + ldr r7, [r3, #OFS_EST_rendstatus] + and r12, r12, #0xf800 + mov r5, r5, lsl #6 @ nametab_step + tst r7, #PDRAW_BORDER_32 + tstne r7, #PDRAW_32_COLS + addne r11, r11, #32 @ center screen in H32 mode - and r4, r0, #0xff - mla r12, r5, r4, r12 @ nametab += nametab_step*start; +0: and r4, r0, #0xff + sub r4, r4, r6 + mla r12, r5, r4, r12 @ nametab += nametab_step*(start-Draw2Start]; + ldr r10, [r3, #OFS_EST_PicoMem_vram] mov r4, r0, lsr #16 @ r4=start_cell_h add r7, r12, r4, lsl #1 @ fetch the first code now - ldr r10, =(Pico+0x10000) @ lr=Pico.vram ldrh r7, [r10, r7] cmp r2, r7, lsr #15 ldmnefd sp!, {r4-r11,pc} @ hack: simply assume that whole window uses same priority @@ -704,15 +745,13 @@ DrawWindowFull: mov r9, #0xff000000 @ r9=prevcode=-1 - ldr r11, =PicoDraw2FB @ r11=scrpos and r4, r0, #0xff - ldr r11, [r11] - sub r4, r4, #START_ROW add r11, r11, #328*8 + sub r4, r4, r6 add r11, r11, #8 mov r7, #328*8 - mla r11, r7, r4, r11 @ scrpos+=8*328*(start-START_ROW); + mla r11, r7, r4, r11 @ scrpos+=8*328*(start-Draw2Start); mov r0, #0xf .dwfloop_outer: @@ -758,7 +797,8 @@ DrawWindowFull: tst r9, #0x080000 @ hflip? bne .dwf_hflip - @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf + @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=PicoMem.vram) + @ r2,r4,r7: scratch, r0=0xf Tile 0, 0 b .dwfloop @@ -868,13 +908,15 @@ DrawWindowFull: cmp r6, #(END_ROW*8+8) bge 52b - @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=Pico.vram) r2,r4,r7: scratch, r0=0xf + @ Tile (r1=pdest, r3=pal, r9=prevcode, r10=PicoMem.vram) + @ r2,r4,r7: scratch, r0=0xf Tile \hflip, \vflip b 52b .endm +@ void DrawSpriteFull(unsigned int *sprite, struct PicoEState *est) -.global DrawSpriteFull @ unsigned int *sprite +.global DrawSpriteFull DrawSpriteFull: stmfd sp!, {r4-r11,lr} @@ -902,12 +944,16 @@ DrawSpriteFull: and r3, lr, #0x6000 mov r3, r3, lsr #9 @ r3=pal=((code>>9)&0x30); - ldr r11, =PicoDraw2FB @ r11=scrpos - ldr r10, =(Pico+0x10000) @ r10=Pico.vram - ldr r11, [r11] - sub r1, r12, #(START_ROW*8) + ldr r0, [r1, #OFS_EST_rendstatus] + ldr r11, [r1, #OFS_EST_Draw2FB] + ldr r2, [r1, #OFS_EST_Draw2Start] + ldr r10, [r1, #OFS_EST_PicoMem_vram] + tst r0, #PDRAW_BORDER_32 @ H32 border mode? + tstne r0, #PDRAW_32_COLS + addne r11, r11, #32 + sub r12, r12, r2, lsl #3 mov r0, #328 - mla r11, r1, r0, r11 @ scrpos+=(sy-START_ROW*8)*328; + mla r11, r12, r0, r11 @ scrpos+=(sy-Draw2Start*8)*328; orr r5, r5, r5, lsl #16 @ orr r5, r6, r5, lsl #8 @ r5=width|(height<<8)|(height<<24) diff --git a/pico/draw_arm.S b/pico/draw_arm.S index 0bf297b9..40d5c443 100644 --- a/pico/draw_arm.S +++ b/pico/draw_arm.S @@ -1,34 +1,39 @@ /* * assembly optimized versions of most funtions from draw.c - * (C) notaz, 2006-2010 + * (C) notaz, 2006-2010,2017 + * (C) irixxxx, 2020-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. * * this is highly specialized, be careful if changing related C code! + * + * NB only does RGB565 output, BGR isn't supported */ -.extern Pico -.extern PicoOpt -.extern HighCol -.extern DrawScanline -.extern HighSprZ -.extern rendstatus -.extern HighPreSpr -.extern DrawLineDest +#include "pico_int_offs.h" + .extern DrawStripInterlace -.extern HighCacheS_ptr -.equiv OVERRIDE_HIGHCOL, 1 - -.equ PDRAW_SPRITES_MOVED, (1<<0) .equ PDRAW_WND_DIFF_PRIO, (1<<1) -.equ PDRAW_ACC_SPRITES, (1<<2) -.equ PDRAW_DIRTY_SPRITES, (1<<4) .equ PDRAW_PLANE_HI_PRIO, (1<<6) .equ PDRAW_SHHI_DONE, (1<<7) +.equ PDRAW_BORDER_32, (1<<9) +.equ PDRAW_32X_SCALE, (1<<12) +.equ PDRAW_BGC_DMA, (1<<14) +.equ PDRAW_SOFTSCALE, (1<<15) + +@ helpers +.macro add_c24 d s c + add \d, \s, #(\c & 0x00ff00) +.if \c & 0x0000ff + add \d, \d, #(\c & 0x0000ff) +.endif +.if \c & 0xff0000 + add \d, \d, #(\c & 0xff0000) +.endif +.endm -@ helper .macro TilePixel pat lsrr offs .if !\lsrr ands r4, \pat, r2 @@ -65,6 +70,45 @@ @ shadow/hilight mode +@ +.macro TilePixelNonSH pat lsrr offs +.if !\lsrr + ands r4, \pat, r2 +.else + ands r4, \pat, r2, lsr #\lsrr +.endif + beq 0f + cmp r4, #0xe + orr r4, r3, r4 + biceq r4, r4, #0x80 + strb r4, [r1,#\offs] +0: +.endm + +@ TileNormNonSH (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf +.macro TileNormNonSH pat + TilePixelNonSH \pat, 12, 0 @ #0x0000f000 + TilePixelNonSH \pat, 8, 1 @ #0x00000f00 + TilePixelNonSH \pat, 4, 2 @ #0x000000f0 + TilePixelNonSH \pat, 0, 3 @ #0x0000000f + TilePixelNonSH \pat, 28, 4 @ #0xf0000000 + TilePixelNonSH \pat, 24, 5 @ #0x0f000000 + TilePixelNonSH \pat, 20, 6 @ #0x00f00000 + TilePixelNonSH \pat, 16, 7 @ #0x000f0000 +.endm + +@ TileFlipNonSH (r1=pdest, r2=pixels8, r3=pal) r4: scratch, pat: register with helper pattern 0xf +.macro TileFlipNonSH pat + TilePixelNonSH \pat, 16, 0 @ #0x000f0000 + TilePixelNonSH \pat, 20, 1 @ #0x00f00000 + TilePixelNonSH \pat, 24, 2 @ #0x0f000000 + TilePixelNonSH \pat, 28, 3 @ #0xf0000000 + TilePixelNonSH \pat, 0, 4 @ #0x0000000f + TilePixelNonSH \pat, 4, 5 @ #0x000000f0 + TilePixelNonSH \pat, 8, 6 @ #0x00000f00 + TilePixelNonSH \pat, 12, 7 @ #0x0000f000 +.endm + @ this one is for hi priority layer .macro TilePixelShHP lsrr offs .if !\lsrr @@ -74,7 +118,7 @@ .endif ldreqb r4, [r1,#\offs] orrne r4, r3, r4 - andeq r4, r4, #0xbf + andeq r4, r4, #0x7f strb r4, [r1,#\offs] .endm @@ -106,52 +150,48 @@ @ TileSingleSh (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx; r12: helper pattern 0xf .macro TileSingleSh tst r0, #1 @ not aligned? - mov r7, #0x00c000 - orr r7, r7, #0xc0 - ldrneb r4, [r1] - ldreqh r4, [r1] - orr r4, r4, r7 - strneb r4, [r1], #1 - streqh r4, [r1], #2 - ldrh r4, [r1] - orr r4, r4, r7 - strh r4, [r1], #2 - ldrh r4, [r1] - orr r4, r4, r7 - strh r4, [r1], #2 - ldrh r4, [r1] - orr r4, r4, r7 - strh r4, [r1], #2 - ldrneb r4, [r1] - orr r4, r4, r7 - strneb r4, [r1], #1 + mov r7, #0x008000 + orr r7, r7, #0x80 + ldrneb r4, [r1], #1 + ldreqh r4, [r1], #2 @ 1ci + ldrh r12, [r1], #2 + orr r4, r4, r7 + strneb r4, [r1, #-3] + streqh r4, [r1, #-4] + ldrh r4, [r1], #2 + orr r12, r12, r7 + strh r12, [r1, #-4] + ldrh r12, [r1], #2 + orr r4, r4, r7 + strh r4, [r1, #-4] + ldrneb r4, [r1] + orr r12, r12, r7 + strh r12, [r1, #-2] + orrne r4, r4, r7 + strneb r4, [r1], #1 + mov r12, #0xf .endm @ TileSingleHi (r1=pdest, r2=pixels8, r3=pal) r4,r7: scratch, r0=sx, r12: register with helper pattern 0xf .macro TileSingleHi tst r1, #1 @ not aligned? - mov r7, #0x008000 - orr r7, r7, #0x80 + mov r7, #0x004000 + orr r7, r7, #0x40 ldrneb r4, [r1], #1 ldreqh r4, [r1], #2 @ 1ci ldrh r12, [r1], #2 - bic r4, r4, r7, lsr #1 orr r4, r4, r7 strneb r4, [r1, #-3] streqh r4, [r1, #-4] ldrh r4, [r1], #2 - bic r12, r12, r7, lsr #1 orr r12, r12, r7 strh r12, [r1, #-4] ldrh r12, [r1], #2 - bic r4, r4, r7, lsr #1 orr r4, r4, r7 strh r4, [r1, #-4] ldrneb r4, [r1] - bic r12, r12, r7, lsr #1 orr r12, r12, r7 strh r12, [r1, #-2] - bicne r4, r4, r7, lsr #1 orrne r4, r4, r7 strneb r4, [r1], #1 mov r12, #0xf @@ -168,7 +208,7 @@ ldrgeb r7, [r1,#\ofs] orrlt r7, r3, r4 @ normal - bicge r7, r7, #0xc0 + subge r4, r4, #1 orrge r7, r7, r4, lsl #6 strb r7, [r1,#\ofs] 0: @@ -208,7 +248,7 @@ cmp r4, #0xe ldrgeb r4, [r1,#\ofs] orrlt r4, r3, r4 - orrge r4, r4, #0x80 + orrge r4, r4, #0x40 strb r4, [r1,#\ofs] 0: .endm @@ -245,8 +285,9 @@ cmp r7, #0xe blt 0f - tst r4, #0xc0 - bicne r4, r4, #0xc0 + tst r4, #0x40 + bicne r4, r4, #0x40 + subne r7, r7, #1 orrne r4, r4, r7, lsl #6 strneb r4, [r1,#\ofs] 0: @@ -287,21 +328,23 @@ @ int cells; // 0x14 @ }; -@ void DrawLayer(int plane_sh, int *hcache, int cellskip, int maxcells); +@ void DrawLayer(int lflags, int *hcache, int cellskip, int maxcells, +@ struct PicoEState *est) .global DrawLayer DrawLayer: + ldr r12, [sp] @ est stmfd sp!, {r4-r11,lr} - ldr r11, =(Pico+0x22228) @ Pico.video + ldr r11, [r12, #OFS_EST_Pico] mov r8, #1 - ldrb r7, [r11, #16] @ ??vv??hh + ldrb r7, [r11, #OFS_Pico_video_reg+16] @ ??vv??hh mov r6, r1 @ hcache - orr r9, r3, r0, lsl #30 - orr r9, r9, r2, lsl #8 @ r9=sh[31]|cellskip[15:8]|maxcells[7:0] (tmp) + orr r9, r3, r0, lsl #29 @ r9=force[31]|sh[30]|plane[29] + orr r9, r9, r2, lsl #8 @ |cellskip[15:8]|maxcells[7:0] (tmp) mov r1, r7, lsl #4 orr r1, r1, #0x00ff @@ -310,39 +353,45 @@ DrawLayer: cmp r10, #1 biclt r1, r1, #0xfc00 biceq r1, r1, #0xfe00 - bicgt r1, r1, #0xff00 @ r1=ymask=(height<<8)|0xff; ...; // Y Mask in pixels + cmp r10, #2 + moveq r1, #0x0007 + movgt r1, #0x00ff @ r1=ymask=(height<<8)|0xff; ...; // Y Mask in pixels - add r10, r10, #5 - cmp r10, #7 - subge r10, r10, #1 @ r10=shift[width] (5,6,6,7) + cmp r10, #2 + addlt r10, r10, #5 + moveq r10, #5 + movgt r10, #7 @ r10=shift[width] (5,6,5,7) + + ldr r2, [r12, #OFS_EST_DrawScanline] + ldr lr, [r12, #OFS_EST_PicoMem_vram] + + @ Find name table: + ands r0, r0, #1 + ldreqb r12, [r11, #OFS_Pico_video_reg+2] + ldrneb r12, [r11, #OFS_Pico_video_reg+4] @ calculate xmask: mov r5, r8, lsl r10 sub r5, r5, #1 @ r5=xmask - @ Find name table: - ands r0, r0, #1 - ldreqb r12, [r11, #2] - ldrneb r12, [r11, #4] - - ldr r2, =DrawScanline @ trying to make good use of pipeline here - ldr lr, =(Pico+0x10000) @ lr=Pico.vram - moveq r12, r12, lsl #10 movne r12, r12, lsl #13 and r12, r12, #(7<<13) @ r12=(ts->nametab<<1) (halfword compliant) - ldrh r8, [r11, #12] - ldrb r7, [r11, #11] - ldr r2, [r2] + ldrh r8, [r11, #OFS_Pico_video_reg+12] + ldrb r7, [r11, #OFS_Pico_video_reg+11] mov r4, r8, lsr #8 @ pvid->reg[13] mov r4, r4, lsl #10 @ htab=pvid->reg[13]<<9; (halfwords) - tst r7, #2 - addne r4, r4, r2, lsl #2 @ htab+=DrawScanline<<1; // Offset by line - tst r7, #1 - biceq r4, r4, #0x1f @ htab&=~0xf; // Offset by tile - add r4, r4, r0, lsl #1 @ htab+=plane + + ands r3, r7, #0x03 + beq 0f + cmp r3, #2 + mov r3, r2, lsl #2 @ htab+=DrawScanline<<1; // Offset by line + biceq r3, r3, #0x1f @ htab&=~0xf; // Offset by tile + andlt r3, r3, #0x1f + add r4, r4, r3 +0: add r4, r4, r0, lsl #1 @ htab+=plane bic r4, r4, #0x00ff0000 @ just in case ldrh r3, [lr, r4] @ r3=hscroll @@ -350,8 +399,7 @@ DrawLayer: bne .DrawStrip_vsscroll @ Get vertical scroll value: - add r7, lr, #0x012000 - add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180) + add_c24 r7, lr, (OFS_PMEM_vsram-OFS_PMEM_vram) ldr r7, [r7] tst r8, #2 @@ -359,7 +407,8 @@ DrawLayer: bne .DrawStrip_interlace tst r0, r0 - movne r7, r7, lsr #16 + moveq r7, r7, lsl #16 + mov r7, r7, lsr #16 @ Find the line in the name table add r2, r2, r7 @@ -381,13 +430,14 @@ DrawLayer: and r1, r1, #7 add r7, r1, #1 @ r7=dx=((ts->hscroll-1)&7)+1 - tst r9, #1<<31 + movs r3, r9, lsl #1 @ (force[31]|sh[30]) << 1 mov r3, #0 - orrne r10,r10, #1<<23 @ r10=(cells<<24|sh<<23|hi_not_empty<<22|had_output<<21|ty) - movne r3, #0x40 @ default to shadowed pal on sh mode + orrmi r10,r10, #1<<23 @ r10=cells[31:24]|sh[23]|hi_not_empty[22] +@ orrcc r10,r10, #1<<20 @ |had_output[21]|!force[20]|hscroll[18:16]|ty[15:0] + movmi r3, #0x80 @ default to shadowed pal on sh mode - cmp r7, #8 - addne r10,r10, #0x01000000 @ we will loop cells+1 times if there is scroll + and r4, r7, #7 + orr r10,r10, r4, lsl #16 @ we will process cells+1 if there is scroll and r9, r9, #0xff00 add r8, r8, r9, lsr #8 @ tilex+=cellskip @@ -395,27 +445,51 @@ DrawLayer: sub r10,r10,r9, lsl #16 @ cells-=cellskip @ cache some stuff to avoid mem access -.if OVERRIDE_HIGHCOL - ldr r11,=HighCol + ldr r11,[sp, #9*4] @ est mov r0, #0xf - ldr r11,[r11] -.else - ldr r11,=HighCol - mov r0, #0xf -.endif + ldr r11,[r11, #OFS_EST_HighCol] mvn r9, #0 @ r9=prevcode=-1 - add r1, r11, r7 @ r1=pdest - + add r1, r11, r7 @ r1=pdest + @ r10=cells[31:24]|sh[23]|hi_not_empty[22]|had_output[21]|!force[20]|hscroll[18:16]|ty[15:0] + @ r1=pd+dx r2=pack r3=pal r5=xmask r6=hc r8=tilex r9=prevcode r11=HighCol r12=nametab lr=vram @ r4 & r7 are scratch in this loop + + ands r4, r10, #7<<16 @ hscroll? + beq .dsloop_subr1 + subs r10,r10, #0x01000000 + bmi .dsloop_exit + + and r7, r5, r8 @ do first cut tile + add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords) + ldrh r9, [r7, r12] @ r7=code (int, but from unsigned, no sign extend) + + add r8, r8, #1 + + tst r9, #0x1000 @ if (code&0x1000) + mov r2, r9, lsl #21 + add r2, r2, r10, lsl #17 + eorne r2, r2, #0xe<<17 @ if (code&0x1000) addr^=0xe; + + ldr r2, [lr, r2, lsr #16] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + + mvn r7, #0 + mov r4, r4, lsr #16-2 @ (dx&7)*4 + tst r9, #0x0800 + moveq r7, r7, lsl r4 @ mask = ~0 [shift] (dx&7)*4 + movne r7, r7, lsr r4 + bic r2, r2, r7, ror #16 @ pack&~mask + + orr r9, r9, #0x80000000 @ invalidate oldcode since pack is masked + b .DrawStrip_samecode + .dsloop_subr1: sub r1, r1, #8 .dsloop: @ 40-41 times subs r10,r10, #0x01000000 bmi .dsloop_exit -.dsloop_enter: and r7, r5, r8 add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords) ldrh r7, [r7, r12] @ r7=code (int, but from unsigned, no sign extend) @@ -423,31 +497,30 @@ DrawLayer: add r1, r1, #8 add r8, r8, #1 - tst r7, #0x8000 - bne .DrawStrip_hiprio - cmp r7, r9 beq .DrawStrip_samecode @ we know stuff about this tile already mov r9, r7 @ remember code - orr r10, r10, #1<<21 @ seen non hi-prio tile - movs r2, r9, lsl #20 @ if (code&0x1000) - mov r2, r2, lsl #1 + tst r9, #0x1000 @ if (code&0x1000) + mov r2, r9, lsl #21 add r2, r2, r10, lsl #17 - mov r2, r2, lsr #17 - eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe; - - ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - - bic r7, r3, #0x3f - and r3, r9, #0x6000 - add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9); + eorne r2, r2, #0x0e<<17 @ if (code&0x1000) addr^=0xe; + ldr r2, [lr, r2, lsr #16] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels .DrawStrip_samecode: + tst r9, #0x8000 +@ tstne r10, #1<<20 @ !force[20] + bne .DrawStrip_hiprio + + orr r10, r10, #1<<21 @ seen non hi-prio tile tst r2, r2 beq .dsloop @ tileline blank + bic r7, r3, #0x7f + and r3, r9, #0x6000 + add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9); + cmp r2, r2, ror #4 beq .DrawStrip_SingleColor @ tileline singlecolor @@ -476,71 +549,102 @@ DrawLayer: strneb r4, [r1], #1 @ have a remaining unaligned pixel? b .dsloop_subr1 -.DrawStrip_hiprio_maybempt: - cmp r7, r9 - beq .dsloop @ must've been empty, otherwise we wouldn't get here - movs r2, r7, lsl #20 @ if (code&0x1000) - mov r2, r2, lsl #1 - add r2, r2, r10, lsl #17 - mov r2, r2, lsr #17 - eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe; - ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - mov r9, r7 @ remember code - tst r2, r2 - beq .dsloop - orr r10, r10, #1<<22 - .DrawStrip_hiprio: - tst r10, #0x00c00000 - beq .DrawStrip_hiprio_maybempt - sub r0, r1, r11 - orr r7, r7, r0, lsl #16 + tst r10, #(1<<23) @ sh[23] + tsteq r2, r2 @ if (!sh[23] && code==blank) continue + beq .dsloop + +@ orr r10, r10, #1<<22 @ hi_not_empty[22] + sub r7, r1, r11 + orr r7, r9, r7, lsl #16 orr r7, r7, r10, lsl #25 @ (ty<<25) - tst r7, #0x1000 - eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26; - str r7, [r6], #4 @ cache hi priority tile - mov r0, #0xf + tst r9, #0x1000 + eorne r7, r7, #0xe<<25 @ if(code&0x1000) cval^=0xe<<25; + str r7, [r6], #4 @ cache hi priority tile code + str r2, [r6], #4 @ cache hi priority tile data b .dsloop .dsloop_exit: + ands r4,r10, #7<<16 @ hscroll? + beq .DrawStrip_noscroll + + and r7, r5, r8 @ do one more cut tile + add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords) + ldrh r9, [r7, r12] @ r7=code (int, but from unsigned, no sign extend) + + add r1, r1, #8 + + tst r9, #0x1000 @ if (code&0x1000) + mov r2, r9, lsl #21 + add r2, r2, r10, lsl #17 + eorne r2, r2, #0x0e<<17 @ if (code&0x1000) addr^=0xe; + + ldr r2, [lr, r2, lsr #16] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + + mvn r7, #0 + mov r4, r4, lsr #16-2 @ (dx&7)*4 + tst r9, #0x0800 + moveq r7, r7, lsl r4 @ mask = ~0 [shift] (dx&7)*4 + movne r7, r7, lsr r4 + and r2, r2, r7, ror #16 @ pack&mask + + bic r10,r10, #7<<16 + b .DrawStrip_samecode @ one last time, with last tile now masked + +.DrawStrip_noscroll: tst r10, #1<<21 @ seen non hi-prio tile - ldreq r1, =rendstatus + ldr r1, [sp, #9*4] @ est mov r0, #0 - ldreq r2, [r1] + ldreq r2, [r1, #OFS_EST_rendstatus] str r0, [r6] @ terminate the cache list orreq r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles - streq r2, [r1] + streq r2, [r1, #OFS_EST_rendstatus] - ldmfd sp!, {r4-r11,lr} - bx lr + ldmfd sp!, {r4-r11,pc} @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ .DrawStrip_vsscroll: + tst r8, #1 @ if h40: lflags |= 0x10000 + orrne r0, r0, #0x10000 + rsb r8, r3, #0 mov r8, r8, lsr #3 @ r8=tilex=(-ts->hscroll)>>3 bic r8, r8, #0x3fc00000 - orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|tilex[21:0]) + orr r8, r8, r5, lsl #25 @ r8=(xmask[31:25]|had_output[24]|!force[23]|tilex[21:0]) - ldr r4, =DrawScanline + ldr r11, [sp, #9*4] @ est orr r5, r1, r10, lsl #24 - ldr r4, [r4] + ldr r4, [r11, #OFS_EST_DrawScanline] sub r1, r3, #1 orr r5, r5, r4, lsl #16 @ r5=(shift_width[31:24]|scanline[23:16]|ymask[15:0]) and r1, r1, #7 add r7, r1, #1 @ r7=dx=((ts->hscroll-1)&7)+1 mov r10,r9, lsl #16 + orr r10,r10, #0xff000000 @ will be adjusted on entering loop tst r0, #1 orrne r10,r10, #0x8000 - tst r9, #1<<31 - mov r3, #0 - orr r10,r10, #0xff000000 @ will be adjusted on entering loop - orrne r10,r10, #1<<23 @ r10=(cell[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0]) - movne r3, #0x40 @ default to shadowed pal on sh mode + tst r3, #0x0f @ hscroll & 0x0f? + beq 0f + eor r3, r3, r7 + sub r10,r10, #1<<24 @ cell-- // start from negative for hscroll + tst r3, #0x08 + subne r10,r10, #1<<16 @ cells-- + subne r10,r10, #1<<24 @ cell-- // even more negative - cmp r7, #8 - subne r10,r10, #0x01000000 @ have hscroll, start with negative cell + add_c24 r1, lr, (OFS_PMEM_vsram-OFS_PMEM_vram) + tst r0, #0x10000 @ h40? + ldrne r3, [r1, #0x00] @ r3=vsram[0x00..0x01] + ldreq r3, [r1, #0x40] @ r3=vsram[0x20..0x21] + str r3, [r1, #0x7c] @ vsram[0x3e..0x3f]=r3 +0: + tst r9, #1<<30 + mov r3, #0 + orrne r10,r10, #1<<23 @ r10=(cell[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0]) + movne r3, #0x80 @ default to shadowed pal on sh mode +@ tst r9, #1<<31 +@ orreq r8, r8, #1<<23 and r9, r9, #0xff00 add r8, r8, r9, lsr #8 @ tilex+=cellskip @@ -548,32 +652,28 @@ DrawLayer: add r10,r10,r9, lsl #16 @ cell+=cellskip @ cache some stuff to avoid mem access -.if OVERRIDE_HIGHCOL - ldr r11,=HighCol + ldr r11,[sp, #9*4] @ est mov r0, #0xf - ldr r11,[r11] -.else - ldr r11,=HighCol - mov r0, #0xf -.endif + ldr r11,[r11, #OFS_EST_HighCol] mvn r9, #0 @ r9=prevcode=-1 add r1, r11, r7 @ r1=pdest + @ r10=cells[31:24]|sh[23]|hi_not_empty[22]|cells_max[21:16]|plane[15]|ty[14:0] + @ r8=xmask[31:25]|had_output[24]|!force[23]|tilex[21:0] + @ r5=shift_width[31:24]|scanline[23:16]|ymask[15:0] + @ r3=nametabadd[31:16]|must_be_0[15:8]|pal[7:0] + @ r1=pd+dx r2=pack r6=hc r9=prevcode r11=HighCol r12=nametab lr=vram @ r4 & r7 are scratch in this loop -.dsloop_vs_subr1: - sub r1, r1, #8 -.dsloop_vs: @ 40-41 times - add r10,r10, #0x01000000 - and r4, r10, #0x003f0000 - cmp r4, r10, asr #8 - ble .dsloop_vs_exit + + @ need to calc new ty? + movs r7, r10, lsl #7 @ (cell&1)? + bmi .dsloop_vs_subr1 @ calc offset and read tileline code to r7, also calc ty - add r7, lr, #0x012000 - add r7, r7, #0x000180 @ r7=Pico.vsram (Pico+0x22180) - add r7, r7, r10,asr #23 @ vsram + ((cell&~1)<<1) - bic r7, r7, #3 + add_c24 r7, lr, (OFS_PMEM_vsram-OFS_PMEM_vram) + and r4, r10, #0x3e000000 + add r7, r7, r4, asr #23 @ vsram + ((cell&0x3e)<<1) tst r10,#0x8000 @ plane1? addne r7, r7, #2 ldrh r7, [r7] @ r7=vscroll @@ -588,40 +688,76 @@ DrawLayer: mov r4, r4, lsr #19 mov r7, r5, lsr #24 mov r4, r4, lsl r7 @ nametabadd + and r3, r3, #0xff + orr r3, r3, r4, lsl #16 @ r3=(nametabadd[31:16],pal[15:0]) +.dsloop_vs_subr1: + sub r1, r1, #8 +.dsloop_vs: @ 40-41 times + add r10,r10, #0x01000000 + and r4, r10, #0x003f0000 + cmp r4, r10, asr #8 + ble .dsloop_vs_exit + + @ need to calc new ty? + movs r7, r10, lsl #7 @ (cell&1)? + bmi 0f + + @ calc offset and read tileline code to r7, also calc ty + add_c24 r7, lr, (OFS_PMEM_vsram-OFS_PMEM_vram) + and r4, r10, #0x3e000000 + add r7, r7, r4, asr #23 @ vsram + ((cell&0x3e)<<1) + tst r10,#0x8000 @ plane1? + addne r7, r7, #2 + ldrh r7, [r7] @ r7=vscroll + + bic r10,r10,#0xff @ clear old ty + and r4, r5, #0xff0000 @ scanline + add r4, r4, r7, lsl #16 @ ... += vscroll + and r4, r4, r5, lsl #16 @ ... &= ymask + and r7, r4, #0x70000 + orr r10,r10,r7, lsr #15 @ new ty + + mov r4, r4, lsr #19 + mov r7, r5, lsr #24 + mov r4, r4, lsl r7 @ nametabadd + and r3, r3, #0xff + orr r3, r3, r4, lsl #16 @ r3=(nametabadd[31:16],pal[15:0]) +0: and r7, r8, r8, lsr #25 - add r7, lr, r7, lsl #1 @ Pico.vram+((tilex&ts->xmask) as halfwords) - add r7, r7, r4, lsl #1 + add r7, lr, r7, lsl #1 @ PicoMem.vram+((tilex&ts->xmask) as halfwords) + add r7, r7, r3, lsr #15 ldrh r7, [r7, r12] @ r7=code (int, but from unsigned, no sign extend) add r1, r1, #8 add r8, r8, #1 - tst r7, #0x8000 - bne .DrawStrip_vs_hiprio - + orr r7, r7, r10, lsl #24 @ code | (ty << 24) cmp r7, r9 beq .DrawStrip_vs_samecode @ we know stuff about this tile already mov r9, r7 @ remember code - orr r8, r8, #(1<<24)@ seen non hi-prio tile - movs r2, r9, lsl #20 @ if (code&0x1000) - mov r2, r2, lsl #1 + tst r9, #0x1000 @ if (code&0x1000) + mov r2, r9, lsl #21 add r2, r2, r10, lsl #17 - mov r2, r2, lsr #17 - eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe; + eorne r2, r2, #0x0e<<17 @ if (code&0x1000) addr^=0xe; - ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - - bic r7, r3, #0x3f - and r3, r9, #0x6000 - add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9); + ldr r2, [lr, r2, lsr #16] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels .DrawStrip_vs_samecode: + tst r9, #0x8000 +@ tstne r8, #1<<23 @ !force[23] + bne .DrawStrip_vs_hiprio + + orr r8, r8, #(1<<24)@ seen non hi-prio tile tst r2, r2 beq .dsloop_vs @ tileline blank + bic r7, r3, #0x7f + and r3, r9, #0x6000 + add r3, r7, r3, lsr #9 @ r3=pal=((code&0x6000)>>9); + cmp r2, r2, ror #4 beq .DrawStrip_vs_SingleColor @ tileline singlecolor @@ -651,43 +787,30 @@ DrawLayer: b .dsloop_vs_subr1 .DrawStrip_vs_hiprio: - tst r10, #0x00c00000 - beq .DrawStrip_vs_hiprio_maybempt - sub r0, r1, r11 - orr r7, r7, r0, lsl #16 - orr r7, r7, r10, lsl #25 @ (ty<<25) - tst r7, #0x1000 - eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26; - str r7, [r6], #4 @ cache hi priority tile - mov r0, #0xf - b .dsloop_vs + tst r10, #(1<<23) @ sh[23] + tsteq r2, r2 @ if (!sh[23] && code==blank) continue + beq .dsloop_vs -.DrawStrip_vs_hiprio_maybempt: - cmp r7, r9 - beq .dsloop_vs @ must've been empty, otherwise we wouldn't get here - movs r2, r7, lsl #20 @ if (code&0x1000) - mov r2, r2, lsl #1 - add r2, r2, r10, lsl #17 - mov r2, r2, lsr #17 - eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe; - ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - mov r9, r7 @ remember code - tst r2, r2 - orrne r10, r10, #1<<22 - bne .DrawStrip_vs_hiprio +@ orr r10, r10, #1<<22 @ hi_not_empty[22] + sub r7, r1, r11 + orr r7, r9, r7, lsl #16 + orr r7, r7, r10, lsl #25 @ (ty<<25) + tst r9, #0x1000 + eorne r7, r7, #7<<26 @ if(code&0x1000) cval^=7<<26; + str r7, [r6], #4 @ cache hi priority tile code + str r2, [r6], #4 @ cache hi priority tile data b .dsloop_vs .dsloop_vs_exit: tst r8, #(1<<24) @ seen non hi-prio tile - ldreq r1, =rendstatus + ldr r1, [sp, #9*4] @ est mov r0, #0 - ldreq r2, [r1] + ldreq r2, [r1, #OFS_EST_rendstatus] str r0, [r6] @ terminate the cache list orreq r2, r2, #PDRAW_PLANE_HI_PRIO @ had a layer with all hi-prio tiles - streq r2, [r1] + streq r2, [r1, #OFS_EST_rendstatus] - ldmfd sp!, {r4-r11,lr} - bx lr + ldmfd sp!, {r4-r11,pc} @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -695,8 +818,8 @@ DrawLayer: @ interlace mode 2? Sonic 2? .DrawStrip_interlace: tst r0, r0 - moveq r7, r7, lsl #21 - movne r7, r7, lsl #5 + movne r7, r7, lsr #16 + mov r7, r7, lsl #21 @ Find the line in the name table add r2, r7, r2, lsl #22 @ r2=(vscroll+(DrawScanline<<1))<<21 (11 bits); @@ -712,35 +835,25 @@ DrawLayer: stmia sp, {r0,r2,r3,r5,r6,r9} mov r0, sp - bl DrawStripInterlace @ struct TileStrip *ts + mov r1, r9, lsr #29 + bl DrawStripInterlace @ struct TileStrip *ts, int plane_sh add sp, sp, #6*4 - ldmfd sp!, {r4-r11,lr} - bx lr + ldmfd sp!, {r4-r11,pc} .pool @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@ void BackFill(int reg7, int sh, struct PicoEState *est) -.global BackFill @ int reg7, int sh +.global BackFill BackFill: stmfd sp!, {r4-r9,lr} -.if OVERRIDE_HIGHCOL - ldr lr, =HighCol - mov r0, r0, lsl #26 - ldr lr, [lr] - mov r0, r0, lsr #26 - add lr, lr, #8 -.else - ldr lr, =(HighCol+8) - mov r0, r0, lsl #26 - mov r0, r0, lsr #26 -.endif - - orr r0, r0, r1, lsl #6 + ldr lr, [r2, #OFS_EST_HighCol] + orr r0, r0, r1, lsl #7 orr r0, r0, r0, lsl #8 orr r0, r0, r0, lsl #16 @@ -753,6 +866,7 @@ BackFill: mov r7, r0 @ go go go! + add lr, lr, #8 stmia lr!, {r0-r7} @ 10*8*4 stmia lr!, {r0-r7} stmia lr!, {r0-r7} @@ -764,30 +878,24 @@ BackFill: stmia lr!, {r0-r7} stmia lr!, {r0-r7} - ldmfd sp!, {r4-r9,r12} - bx r12 + ldmfd sp!, {r4-r9,pc} @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@ void DrawTilesFromCache(int *hc, int sh, int rlim, struct PicoEState *est) -.global DrawTilesFromCache @ int *hc, int sh, int rlim +.global DrawTilesFromCache DrawTilesFromCache: - stmfd sp!, {r4-r8,r11,lr} + stmfd sp!, {r4-r9,r11,lr} @ cache some stuff to avoid mem access -.if OVERRIDE_HIGHCOL - ldr r11,=HighCol + ldr r11,[r3, #OFS_EST_HighCol] mov r12,#0xf - ldr r11,[r11] -.else - ldr r11,=HighCol - mov r12,#0xf -.endif - ldr lr, =(Pico+0x10000) @ lr=Pico.vram + ldr lr, [r3, #OFS_EST_PicoMem_vram] + mov r9, r3 @ est - mvn r5, #0 @ r5=prevcode=-1 ands r8, r1, #1 orr r8, r8, r2, lsl #1 bne .dtfc_check_rendflags @@ -796,26 +904,15 @@ DrawTilesFromCache: .dtfc_loop: ldr r6, [r0], #4 @ read code movs r1, r6, lsr #16 @ r1=dx; - ldmeqfd sp!, {r4-r8,r11,pc} @ dx is never zero, this must be a terminator, return + ldmeqfd sp!, {r4-r9,r11,pc} @ dx is never zero, this must be a terminator, return bic r4, r1, #0xfe00 add r1, r11, r4 @ r1=pdest - mov r7, r6, lsl #16 - cmp r5, r7, lsr #16 - beq .dtfc_samecode @ if (code==prevcode) + ldr r2, [r0], #4 @ read pixel data - mov r5, r7, lsr #16 - - mov r2, r5, lsl #21 - mov r2, r2, lsr #17 @ r2=addr=(code&0x7ff)<<4; - add r2, r2, r6, lsr #25 @ addr+=ty - - and r3, r5, #0x6000 + and r3, r6, #0x6000 mov r3, r3, lsr #9 @ r3=pal=((code&0x6000)>>9); - ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - -.dtfc_samecode: rsbs r4, r4, r8, lsr #1 bmi .dtfc_cut_tile @@ -828,7 +925,7 @@ DrawTilesFromCache: cmp r2, r2, ror #4 beq .dtfc_SingleColor @ tileline singlecolor - tst r5, #0x0800 + tst r6, #0x0800 bne .dtfc_TileFlip @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern @@ -860,7 +957,7 @@ DrawTilesFromCache: cmp r2, r2, ror #4 beq .dtfc_SingleColor @ tileline singlecolor - tst r5, #0x0800 + tst r6, #0x0800 bne .dtfc_TileFlipShHP @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern @@ -875,8 +972,8 @@ DrawTilesFromCache: .dtfc_shadow_blank: tst r1, #1 ldrneb r4, [r1] - mov r6, #0xbf - and r4, r4, #0xbf + mov r6, #0x7f + and r4, r4, r6 strneb r4, [r1], #1 ldrh r4, [r1] orr r6, r6, r6, lsl #8 @@ -895,39 +992,39 @@ DrawTilesFromCache: b .dtfc_loop .dtfc_cut_tile: - add r4, r4, #7 @ 0-6 + cmn r4, #8 + ble .dtfc_loop @ off limits + + rsb r4, r4, #0 @ 1-7 mov r4, r4, lsl #2 - mov r12,#0xf<<28 - mov r12,r12,asr r4 - mov r2, r2, ror #16 - tst r5, #0x0800 @ flipped? - mvnne r12,r12 - and r2, r2, r12 - mov r2, r2, ror #16 + mvn r12,#0 + tst r6, #0x0800 @ flipped? + moveq r12,r12, lsl r4 + movne r12,r12, lsr r4 + and r2, r2, r12, ror #16 mov r12,#0xf tst r8, #1 bne .dtfc_shadow tst r2, r2 beq .dtfc_loop - tst r5, #0x0800 + tst r6, #0x0800 beq .dtfc_TileNorm b .dtfc_TileFlip @ check if we have detected layer covered with hi-prio tiles: .dtfc_check_rendflags: - ldr r1, =rendstatus - ldr r2, [r1] + ldr r2, [r9, #OFS_EST_rendstatus] tst r2, #(PDRAW_PLANE_HI_PRIO|PDRAW_SHHI_DONE) beq .dtfc_loop bic r8, r8, #1 @ sh/hi mode off tst r2, #PDRAW_SHHI_DONE bne .dtfc_loop @ already processed orr r2, r2, #PDRAW_SHHI_DONE - str r2, [r1] + str r2, [r9, #OFS_EST_rendstatus] add r1, r11,#8 mov r3, #320/4/4 - mov r6, #0xbf + mov r6, #0x7f orr r6, r6, r6, lsl #8 orr r6, r6, r6, lsl #16 .dtfc_loop_shprep: @@ -940,7 +1037,6 @@ DrawTilesFromCache: stmia r1!,{r2,r4,r5,r7} bne .dtfc_loop_shprep - mvn r5, #0 @ r5=prevcode=-1 b .dtfc_loop .pool @@ -948,37 +1044,39 @@ DrawTilesFromCache: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -.global DrawSpritesSHi @ unsigned char *sprited +@ void DrawSpritesSHi(unsigned char *sprited, struct PicoEState *est) + +.global DrawSpritesSHi DrawSpritesSHi: - ldr r3, [r0] + ldrb r3, [r0] mov r12,#0xff ands r3, r3, #0x7f bxeq lr - stmfd sp!, {r4-r11,lr} - strb r12,[r0,#2] @ set end marker - add r10,r0, #3 @ r10=HighLnSpr end + stmfd sp!, {r1,r3-r11,lr} @ +est + strb r12,[r0,#3] @ set end marker + ldrb r12,[r0,#1] + add r10,r0, #4 @ r10=HighLnSpr end + mvn r12,r12 + tst r12,#0x6 @ masking in slot 1 and tile ovfl? + ldmeqfd sp!, {r1,r3-r11,pc} add r10,r10,r3 @ r10=HighLnSpr end -.if OVERRIDE_HIGHCOL - ldr r11,=HighCol + ldrb r12,[r10,#0] @ width of last sprite + ldr r11,[r1, #OFS_EST_HighCol] + str r12,[sp, #4] mov r12,#0xf - ldr r11,[r11] -.else - ldr r11,=HighCol - mov r12,#0xf -.endif - ldr lr, =(Pico+0x10000) @ lr=Pico.vram + ldr lr, [r1, #OFS_EST_PicoMem_vram] DrawSpriteSHi: @ draw next sprite + ldr r7, [sp] @ est ldrb r0, [r10,#-1]! - ldr r1, =HighPreSpr -@ ldr r8, [sp, #-4] + ldr r1, [r7, #OFS_EST_HighPreSpr] cmp r0, #0xff - ldmeqfd sp!, {r4-r11,pc} @ end of list + ldmeqfd sp!, {r1,r3-r11,pc} @ end of list and r0, r0, #0x7f add r0, r1, r0, lsl #3 @@ -994,18 +1092,18 @@ DrawSpriteSHi: orr r9, r9, #0x90000000 @ r9=scc1 ???? ... (s=shadow/hilight, cc=pal) cmp r12,r9, lsr #28 @ sh/hi with pal3? cmpne r3, #1 @ if not, is it hi prio? + strne r3, [sp, #4] @ reset last sprite width bne DrawSpriteSHi @ non-operator low sprite, already drawn ldr r3, [r0] @ sprite[0] - ldr r7, =DrawScanline mov r6, r3, lsr #28 sub r6, r6, #1 @ r6=width-1 (inc later) mov r5, r3, lsr #24 and r5, r5, #7 @ r5=height + ldr r7, [r7, #OFS_EST_DrawScanline] mov r0, r3, lsl #16 @ r4=sy<<16 (tmp) - ldr r7, [r7] sub r7, r7, r0, asr #16 @ r7=row=DrawScanline-sy tst r9, #0x1000 @@ -1023,10 +1121,16 @@ DrawSpriteSHi: and r7, r7, #7 add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address + ldr r0, [sp, #4] + add r6, r6, #1 @ inc now + cmp r0, #0 @ check width of last sprite + movne r6, r0 + movne r0, #0 + strne r0, [sp, #4] + mov r5, r5, lsl #4 @ delta<<=4; // Delta of address mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30); - add r6, r6, #1 @ inc now adds r0, r2, #0 @ mov sx to r0 and set ZV flags b .dsprShi_loop_enter @@ -1041,10 +1145,8 @@ DrawSpriteSHi: cmp r0, #328 bge DrawSpriteSHi - mov r8, r8, lsl #17 - mov r8, r8, lsr #17 @ tile&=0x7fff; // Clip tile address - - ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + bic r8, r8, #0xf8000 @ tile&=0x7fff; // Clip tile address + ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels add r1, r11, r0 @ r1=pdest tst r2, r2 beq .dsprShi_loop @@ -1128,86 +1230,75 @@ DrawSpriteSHi: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -.global DrawAllSprites @ unsigned char *sprited, int prio, int sh +@ void DrawAllSprites(unsigned char *sprited, int prio, int sh, +@ struct PicoEState *est) + +.global DrawAllSprites DrawAllSprites: - ldr r3, =rendstatus orr r1, r2, r1, lsl #1 - ldr r12,[r3] - tst r12,#(PDRAW_DIRTY_SPRITES|PDRAW_SPRITES_MOVED) - beq das_no_prep - stmfd sp!, {r0,r1,lr} - and r0, r12,#PDRAW_DIRTY_SPRITES - bic r12,r12,#(PDRAW_DIRTY_SPRITES|PDRAW_SPRITES_MOVED) - str r12,[r3] - bl PrepareSprites - ldmfd sp!, {r0,r1,lr} - -das_no_prep: - ldr r3, [r0] - ands r3, r3, #0x7f + ldr r2, [r0] + ands r2, r2, #0x7f bxeq lr @ time to do some real work - stmfd sp!, {r4-r11,lr} + stmfd sp!, {r1,r3-r11,lr} @ +sh|prio<<1 +est mov r12,#0xff - strb r12,[r0,#2] @ set end marker - add r10,r0, #3 - add r10,r10,r3 @ r10=HighLnSpr end + strb r12,[r0,#3] @ set end marker + ldrb r12,[r0,#1] + add r10,r0 ,#4 + mvn r12,r12 + tst r12,#0x6 @ masking in slot 1 and tile ovfl? + ldmeqfd sp!, {r1,r3-r11,pc} + add r10,r10,r2 @ r10=HighLnSpr end - str r1, [sp, #-4] @ no calls after this point - -.if OVERRIDE_HIGHCOL - ldr r11,=HighCol + ldrb r12,[r10,#0] @ width of last sprite + ldr r11,[r3, #OFS_EST_HighCol] + orr r1 ,r1 ,r12,lsl #24 + str r1, [sp] mov r12,#0xf - ldr r11,[r11] -.else - ldr r11,=HighCol - mov r12,#0xf -.endif - ldr lr, =(Pico+0x10000) @ lr=Pico.vram + ldr lr, [r3, #OFS_EST_PicoMem_vram] @ + 0 : hhhhvvvv ----hhvv yyyyyyyy yyyyyyyy // v, h: horiz. size @ + 4 : xxxxxxxx xxxxxxxx pccvhnnn nnnnnnnn // x: x coord + 8 -DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites +DrawSprite: @ draw next sprite ldrb r0, [r10,#-1]! - ldr r1, =HighPreSpr - ldr r8, [sp, #-4] - mov r2, r0, lsr #7 + ldr r4, [sp] @ sh|prio<<1|lastw<<24 + ldr r7, [sp, #4] @ est + mov r2, r0, lsl #24 cmp r0, #0xff - ldmeqfd sp!, {r4-r11,pc} @ end of list - cmp r2, r8, lsr #1 - bne DrawSprite @ wrong priority + ldmeqfd sp!, {r1,r3-r11,pc} @ end of list + eors r2, r2, r4, lsl #30 + bic r2, r4, #0xff000000 + str r2, [sp] + bmi DrawSprite @ wrong priority + ldr r1, [r7, #OFS_EST_HighPreSpr] and r0, r0, #0x7f add r0, r1, r0, lsl #3 -@ stmfd sp!, {r4-r9,r11,lr} -@ orr r8, r2, r1, lsl #4 - ldr r3, [r0] @ sprite[0] - ldr r7, =DrawScanline + ldr r7, [r7, #OFS_EST_DrawScanline] mov r6, r3, lsr #28 sub r6, r6, #1 @ r6=width-1 (inc later) mov r5, r3, lsr #24 and r5, r5, #7 @ r5=height - mov r4, r3, lsl #16 @ r4=sy<<16 (tmp) + mov r8, r3, lsl #16 @ r8=sy<<16 (tmp) - ldr r7, [r7] ldr r9, [r0, #4] - sub r7, r7, r4, asr #16 @ r7=row=DrawScanline-sy + sub r7, r7, r8, asr #16 @ r7=row=DrawScanline-sy mov r2, r9, asr #16 @ r2=sx mov r9, r9, lsl #16 mov r9, r9, lsr #16 - orr r9, r9, r8, lsl #31 @ r9=code|sh[31] + orr r9, r9, r4, lsl #31 @ r9=code|sh[31] tst r9, #0x1000 - movne r4, r5, lsl #3 - subne r4, r4, #1 - subne r7, r4, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y + movne r8, r5, lsl #3 + subne r8, r8, #1 + subne r7, r8, r7 @ if (code&0x1000) row=(height<<3)-1-row; // Flip Y add r8, r9, r7, lsr #3 @ tile+=row>>3; // Tile number increases going down tst r9, #0x0800 @@ -1219,7 +1310,10 @@ DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites and r7, r7, #7 add r8, r8, r7, lsl #1 @ tile+=(row&7)<<1; // Tile address -.dspr_continue: + add r6, r6, #1 @ inc now + cmp r4, #0x1000000 @ check width of last sprite + movhs r6, r4, lsr #24 + @ cache some stuff to avoid mem access mov r5, r5, lsl #4 @ delta<<=4; // Delta of address and r4, r9, #0x6000 @@ -1227,9 +1321,8 @@ DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites orrs r9, r9, #0x10000000 @ r9=scc1 ???? ... (s=shadow/hilight, cc=pal) mov r3, r4, lsr #9 @ r3=pal=((code>>9)&0x30); - orrmi r3, r3, #0x40 @ for sh/hi + orrmi r3, r3, #0x80 @ for sh/hi - add r6, r6, #1 @ inc now adds r0, r2, #0 @ mov sx to r0 and set ZV flags b .dspr_loop_enter @@ -1244,10 +1337,8 @@ DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites cmp r0, #328 bge DrawSprite - mov r8, r8, lsl #17 - mov r8, r8, lsr #17 @ tile&=0x7fff; // Clip tile address - - ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels + bic r8, r8, #0xf8000 @ tile&=0x7fff; // Clip tile address + ldr r2, [lr, r8, lsl #1] @ pack=*(unsigned int *)(PicoMem.vram+addr); // Get 8 pixels add r1, r11, r0 @ r1=pdest tst r2, r2 beq .dspr_loop @@ -1255,6 +1346,9 @@ DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites cmp r12, r9, lsr #28 beq .dspr_shadow + tst r9, #0x80000000 + bne .dspr_shnonsh + cmp r2, r2, ror #4 beq .dspr_SingleColor @ tileline singlecolor @@ -1288,6 +1382,20 @@ DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites strneb r4, [r1], #1 b .dspr_loop +.dspr_shnonsh: + tst r9, #0x0800 + bne .dspr_TileFlipNonSH + + @ (r1=pdest, r2=pixels8, r3=pal) r4: scratch, r12: helper pattern +@ scratch: r4, r7 +.dspr_TileNormNonSH: + TileNormNonSH r12 + b .dspr_loop + +.dspr_TileFlipNonSH: + TileFlipNonSH r12 + b .dspr_loop + .dspr_shadow: cmp r2, r2, ror #4 beq .dspr_singlec_sh @@ -1307,21 +1415,28 @@ DrawSprite: @ was: unsigned int *sprite, int sh, int acc_sprites @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ -.global DrawWindow @ int tstart, int tend, int prio, int sh // int *hcache +@ void DrawWindow(int tstart, int tend, int prio, int sh +@ struct PicoEState *est) + +.global DrawWindow DrawWindow: + ldr r12, [sp] @ est stmfd sp!, {r4-r11,lr} - ldr r11, =(Pico+0x22228) @ Pico.video - ldr r10, =DrawScanline - ldrb r12, [r11, #3] @ pvid->reg[3] + ldr r6, [r12, #OFS_EST_Pico] + ldr r10, [r12, #OFS_EST_DrawScanline] + mov r11, r12 @ est + ldrb r12, [r6, #OFS_Pico_video_reg+3] @ pvid->reg[3] - ldr r10, [r10] - ldr r4, [r11, #12] + ldr r4, [r6, #OFS_Pico_video_reg+12] mov r5, r10, lsr #3 and r10, r10, #7 mov r10, r10, lsl #1 @ r10=ty + ldr r6, [r11, #OFS_EST_rendstatus] + ldr lr, [r11, #OFS_EST_PicoMem_vram] + mov r12, r12, lsl #10 tst r4, #1 @ 40 cell mode? @@ -1331,70 +1446,51 @@ DrawWindow: addeq r12, r12, r5, lsl #6 @ nametab add r12, r12, r0, lsl #2 @ +starttile - ldr r6, =rendstatus - ldr lr, =(Pico+0x10000) @ lr=Pico.vram - ldr r6, [r6] - - @ fetch the first code now - ldrh r7, [lr, r12] - ands r6, r6, #PDRAW_WND_DIFF_PRIO - orr r6, r6, r2 - - eoreq r8, r2, r7, lsr #15 @ do prio bits differ? - cmpeq r8, #1 + cmpeq r2, #1 @ prio && !(rendstatus & WND_DIFF_PRIO)? ldmeqfd sp!, {r4-r11,pc} @ yes, assume that whole window uses same priority + orr r6, r6, r2 orr r6, r6, r3, lsl #8 @ shadow mode sub r8, r1, r0 @ cache some stuff to avoid mem access -.if OVERRIDE_HIGHCOL - ldr r11,=HighCol - mov r8, r8, lsl #1 @ cells - ldr r11,[r11] - mvn r9, #0 @ r9=prevcode=-1 - add r11,r11,#8 -.else - ldr r11,=(HighCol+8) + ldr r11, [r11, #OFS_EST_HighCol] mov r8, r8, lsl #1 @ cells mvn r9, #0 @ r9=prevcode=-1 -.endif - add r1, r11, r0, lsl #4 @ r1=pdest + add r1, r11, r0, lsl #4 @ r1=pdest=HighCol+starttile (+8 added in loop) mov r0, #0xf - b .dwloop_enter @ r4,r5 are scratch in this loop .dwloop: add r1, r1, #8 .dwloop_nor1: - add r12, r12, #2 @ halfwords ldrh r7, [lr, r12] @ r7=code (int, but from unsigned, no sign extend) + add r12, r12, #2 @ halfwords subs r8, r8, #1 - beq .dwloop_end @ done + bmi .dwloop_end @ done eor r5, r6, r7, lsr #15 tst r5, #1 - orrne r6, r6, #2 @ wrong pri + orrne r6, r6, #PDRAW_WND_DIFF_PRIO @ wrong pri bne .dwloop cmp r7, r9 beq .dw_samecode @ we know stuff about this tile already -.dwloop_enter: mov r9, r7 @ remember code - movs r2, r9, lsl #20 @ if (code&0x1000) - mov r2, r2, lsl #1 - add r2, r10, r2, lsr #17 @ r2=addr=(code&0x7ff)<<4; addr+=ty - eorcs r2, r2, #0x0e @ if (code&0x1000) addr^=0xe; + tst r9, #0x1000 @ if (code&0x1000) + mov r2, r9, lsl #21 + add r2, r2, r10, lsl #17 + eorne r2, r2, #0xe<<17 @ if (code&0x1000) addr^=0xe; + + ldr r2, [lr, r2, lsr #16] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels and r3, r9, #0x6000 mov r3, r3, lsr #9 @ r3=pal=((code&0x6000)>>9); - ldr r2, [lr, r2, lsl #1] @ pack=*(unsigned int *)(Pico.vram+addr); // Get 8 pixels - .dw_samecode: tst r6, #0x100 bne .dw_shadow @@ -1428,10 +1524,10 @@ DrawWindow: .dw_shadow: tst r6, #1 @ hi pri? - orreq r3, r3, #0x40 + orreq r3, r3, #0x80 beq .dw_shadow_done ldr r4, [r1] - mov r5, #0x3f + mov r5, #0x7f orr r5, r5, r5, lsl #8 orr r5, r5, r5, lsl #16 and r4, r4, r5 @@ -1442,154 +1538,14 @@ DrawWindow: b .dw_shadow_done .dwloop_end: - ldr r0, =rendstatus - ldr r1, [r0] - and r6, r6, #PDRAW_WND_DIFF_PRIO - orr r1, r1, r6 - str r1, [r0] + and r2, r6, #PDRAW_WND_DIFF_PRIO + ldmfd sp!, {r4-r11,lr} + ldr r0, [sp] + ldr r1, [r0, #OFS_EST_rendstatus] + orr r1, r1, r2 + str r1, [r0, #OFS_EST_rendstatus] - ldmfd sp!, {r4-r11,r12} - bx r12 - - -@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - - -@ hilights 2 pixels in RGB444/BGR444 format -.macro TileDoShHi2Pixels444 reg - mov \reg, \reg, ror #12 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #28 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #28 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #24 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #28 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #28 - adds \reg, \reg, #0x40000000 - orrcs \reg, \reg, #0xf0000000 - mov \reg, \reg, ror #12 -.endm - - -.global FinalizeLineBGR444 @ int sh - -FinalizeLineBGR444: - stmfd sp!, {r4-r6,lr} - mov r6, r0 - ldr lr, =(Pico+0x22228) @ Pico.video - ldr r0, =DrawLineDest - ldrb r12, [lr, #12] - ldr r0, [r0] - sub r3, lr, #0x128 @ r3=Pico.cram - - tst r12, #1 - movne r2, #320/4 @ len - bne .fl_no32colBGR444 - ldr r4, =PicoOpt - mov r2, #256/4 - ldr r4, [r4] - tst r4, #0x100 - addeq r0, r0, #32*2 - -.fl_no32colBGR444: - tst r6, r6 - beq .fl_noshBGR444 - - ldr r4, =HighPal - - ldrb r12, [lr, #-0x1a] @ 0x2220e ~ dirtyPal - tst r12, r12 - moveq r3, r4 - beq .fl_noshBGR444 - mov r12, #0 - strb r12, [lr, #-0x1a] - - mov lr, #0x40/8 - @ copy pal: -.fl_loopcpBGR444: - ldmia r3!, {r1,r5,r6,r12} - subs lr, lr, #1 - stmia r4!, {r1,r5,r6,r12} - bne .fl_loopcpBGR444 - - @ shadowed pixels: - mov r12, #0x0077 - orr r12,r12,#0x0700 - orr r12,r12,r12,lsl #16 - sub r3, r3, #0x40*2 - add r5, r4, #0x80*2 - mov lr, #0x40/4 -.fl_loopcpBGR444_sh: - ldmia r3!, {r1,r6} - subs lr, lr, #1 - and r1, r12, r1, lsr #1 - and r6, r12, r6, lsr #1 - stmia r4!, {r1,r6} - stmia r5!, {r1,r6} - bne .fl_loopcpBGR444_sh - - @ hilighted pixels: - sub r3, r3, #0x40*2 - mov lr, #0x40/2 -.fl_loopcpBGR444_hi: - ldr r1, [r3], #4 - TileDoShHi2Pixels444 r1 - str r1, [r4], #4 - subs lr, lr, #1 - bne .fl_loopcpBGR444_hi - - sub r3, r4, #0x40*3*2 - mov r6, #1 - - -.fl_noshBGR444: - ldr r12,=rendstatus - eors r6, r6, #1 @ sh is 0 - ldr r12,[r12] - mov lr, #0xff - tstne r12,#PDRAW_ACC_SPRITES - -.if OVERRIDE_HIGHCOL - ldr r1, =HighCol - movne lr, #0x3f - ldr r1, [r1] - mov lr, lr, lsl #1 - add r1, r1, #8 -.else - ldr r1, =(HighCol+8) - movne lr, #0x3f - mov lr, lr, lsl #1 -.endif - -.fl_loopBGR444: - ldr r12, [r1], #4 - subs r2, r2, #1 - - and r4, lr, r12, lsl #1 - ldrh r4, [r3, r4] - and r5, lr, r12, lsr #7 - ldrh r5, [r3, r5] - and r6, lr, r12, lsr #15 - ldrh r6, [r3, r6] - and r12,lr, r12, lsr #23 - ldrh r12,[r3, r12] @ 1c.i. - orr r4, r4, r5, lsl #16 - orr r5, r6, r12,lsl #16 - - stmia r0!, {r4,r5} - bne .fl_loopBGR444 - - - ldmfd sp!, {r4-r6,lr} - bx lr + bx lr @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -1640,26 +1596,26 @@ vidConvCpyRGB565: @ void *to, void *from, int pixels orr r8, r8, #0x0800 orr r8, r8, r8, lsl #16 vidConvCpyRGB565_local - ldmfd sp!, {r4-r9,lr} - bx lr + ldmfd sp!, {r4-r9,pc} -.global PicoDoHighPal555 @ int sh +@ void PicoDoHighPal555(int sh, int line, struct PicoEState *est) + +.global PicoDoHighPal555 PicoDoHighPal555: - stmfd sp!, {r4-r9,lr} - mov r1, #0 - ldr r8, =(Pico+0x22228) @ Pico.video + stmfd sp!, {r4-r10,lr} + mov r10,r2 @ est + ldr r8, [r10, #OFS_EST_Pico] -PicoDoHighPal555_nopush: - orr r9, r1, r0, lsl #31 @ 0:called from FinalizeLine555, 31: s/h + mov r9, r0 - ldr r0, =HighPal + add r0, r10, #OFS_EST_HighPal mov r1, #0 - strb r1, [r8, #-0x1a] @ 0x2220e ~ dirtyPal + strb r1, [r8, #OFS_Pico_m_dirtyPal] - sub r1, r8, #0x128 @ r1=Pico.cram + ldr r1, [r10, #OFS_EST_PicoMem_cram] mov r2, #0x40 mov r8, #0x0061 orr r8, r8, #0x0800 @@ -1667,32 +1623,19 @@ PicoDoHighPal555_nopush: vidConvCpyRGB565_local - tst r9, #(1<<31) + cmp r9, #0 beq PicoDoHighPal555_end - ldr r3, =HighPal - - @ shadowed pixels: - mov r12, #0x008e + add r3, r10, #OFS_EST_HighPal add r4, r3, #0x40*2 - orr r12,r12,#0x7300 - add r5, r3, #0xc0*2 - orr r12,r12,r12,lsl #16 - mov lr, #0x40/4 -.fl_loopcpRGB555_sh: - ldmia r3!, {r1,r6} - subs lr, lr, #1 - and r1, r12, r1, lsr #1 - and r6, r12, r6, lsr #1 - stmia r4!, {r1,r6} - stmia r5!, {r1,r6} - bne .fl_loopcpRGB555_sh - @ hilighted pixels: + @ hilighted pixels (0x40-0x7f): @ t = ((dpal[i] >> 1) & 0x738e738e) + 0x738e738e; @ t |= (t >> 4) & 0x08610861; @ r8=0x08610861 - sub r3, r3, #0x40*2 + mov r12, #0x008e + orr r12,r12,#0x7300 + orr r12,r12,r12,lsl #16 mov lr, #0x40/4 .fl_loopcpRGB555_hi: ldmia r3!, {r1,r6} @@ -1707,65 +1650,93 @@ PicoDoHighPal555_nopush: stmia r4!, {r1,r6} subs lr, lr, #1 bne .fl_loopcpRGB555_hi + + sub r3, r3, #0x40*2 + @ shadowed (0x80-0xbf), shadow|hilight (aka normal, 0xc0-0xff) pixels: + add r5, r3, #0xc0*2 + mov lr, #0x40/4 +.fl_loopcpRGB555_sh: + ldmia r3!, {r1,r6} + subs lr, lr, #1 + stmia r5!, {r1,r6} @ 0xc0, normal + and r1, r12, r1, lsr #1 + and r6, r12, r6, lsr #1 + stmia r4!, {r1,r6} + bne .fl_loopcpRGB555_sh + mov r0, #1 - PicoDoHighPal555_end: - tst r9, #1 - ldmeqfd sp!, {r4-r9,pc} - - ldr r8, =(Pico+0x22228) @ Pico.video - b FinalizeLineRGB555_pal_done + ldmfd sp!, {r4-r10,pc} -.global FinalizeLine555 @ int sh +@ void FinalizeLine555(int sh, int line, struct PicoEState *est) + +.global FinalizeLine555 FinalizeLine555: - stmfd sp!, {r4-r9,lr} - ldr r8, =(Pico+0x22228) @ Pico.video + ldr r3, [r2, #OFS_EST_rendstatus] + mov r0, r2 + tst r3, #PDRAW_BGC_DMA + bne BgcDMA - ldrb r2, [r8, #-0x1a] @ 0x2220e ~ dirtyPal - mov r1, #1 - tst r2, r2 - bne PicoDoHighPal555_nopush + stmfd sp!, {r4-r11,lr} + mov r11,r2 @ est + mov r4, r3 -FinalizeLineRGB555_pal_done: - ldr r3, =HighPal + bl PicoDrawUpdateHighPal + + ldr r8, [r11, #OFS_EST_Pico] + add r3, r11, #OFS_EST_HighPal - ldr r12,=rendstatus - eors r0, r0, #1 @ sh is 0 - ldr r12,[r12] mov lr, #0xff - tstne r12,#PDRAW_ACC_SPRITES - movne lr, #0x3f - -.if OVERRIDE_HIGHCOL - ldr r1, =HighCol - ldr r0, =DrawLineDest - ldr r1, [r1] - ldr r0, [r0] - add r1, r1, #8 -.else - ldr r0, =DrawLineDest - ldr r1, =(HighCol+8) - ldr r0, [r0] -.endif - - ldrb r12, [r8, #12] mov lr, lr, lsl #1 - tst r12, #1 - movne r2, #320/8 @ len - bne .fl_no32colRGB555 - ldr r4, =PicoOpt - mov r2, #256/8 - ldr r4, [r4] - tst r4, #0x4000 - bne .fl_32scale_RGB555 - tst r4, #0x0100 - addeq r0, r0, #32*2 + ldr r5, [r11, #OFS_EST_PicoOpt] + ldr r1, [r11, #OFS_EST_HighCol] + ldr r0, [r11, #OFS_EST_DrawLineDest] + ldr r7, [r5, #OFS_PicoIn_AHW-OFS_PicoIn_opt] + ldrb r12,[r8, #OFS_Pico_video_reg+12] + ldrb r6, [r8, #OFS_Pico_video_reg+0] + ldr r2, [r8, #OFS_Pico_m_hardware] + add r1, r1, #8 -.fl_no32colRGB555: + tst r7, #0x20 @ GG ? + tstne r2, #0x2 @ LCD ? + bne .fl_gg20col + tst r7, #0x10 @ SMS ? + beq .fl_noSMS + + tst r6, #0x20 + movne r2, #248/8 @ len = 248 + addne r1, r1, #8 @ ps += 8 + moveq r2, #256/8 @ len = 256 + b .fl_check32scaling + +.fl_gg20col: + mov r2, #160/8 @ len = 160 + tst r4, #PDRAW_SOFTSCALE + bne .fl_20scale_RGB555 @ scale 160->320 + b .fl_checkborder + +.fl_noSMS: + tst r12, #1 @ h32? + movne r2, #320/8 @ len = 320 + bne .fl_40colRGB555 + mov r2, #256/8 @ len = 256 + +.fl_check32scaling: + tst r4, #PDRAW_SOFTSCALE + rsbne r7, r2, #256/8 + addne r0, r0, r7, lsl #3 @ pd += (256-len)>>1 + bne .fl_32scale_RGB555 @ scale 256->320 + +.fl_checkborder: + tst r4, #PDRAW_BORDER_32 + rsbne r7, r2, #320/8 @ pd += (320-len)/2 + addne r0, r0, r7, lsl #3 + +.fl_40colRGB555: #ifdef UNALIGNED_DRAWLINEDEST @ this is basically for Gizmondo, which has unaligned odd lines in the framebuffer tst r0, #2 @@ -1804,21 +1775,174 @@ FinalizeLineRGB555_pal_done: stmia r0!, {r4,r5,r8,r12} bne .fl_loopRGB555 - ldmfd sp!, {r4-r9,lr} - bx lr + ldmfd sp!, {r4-r11,pc} .fl_32scale_RGB555: - stmfd sp!, {r10} - mov r9, #0x3900 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 - orr r9, r9, #0x00e7 + ldr r5, [r5, #OFS_PicoIn_filter-OFS_PicoIn_opt] + + mov r9, #0xf700 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 + orr r9, r9, #0x00de #ifdef UNALIGNED_DRAWLINEDEST tst r0, #2 bne .fl_32scale_RGB555u #endif -.fl_loop32scale_RGB555: + and r5, r5, #0x3 + add pc, pc, r5, lsl #2 + nop + b .fl_32scale_nn + b .fl_32scale_snn + b .fl_32scale_bl2 + b .fl_32scale_bl4 + +.fl_32scale_nn: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r4, lr, r12, lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r12, lsr #7 + ldrh r5, [r3, r5] + and r6, lr, r12, lsr #15 + ldrh r6, [r3, r6] + and r10,lr, r12, lsr #23 + ldrh r10,[r3, r10] + + orr r4, r4, r5, lsl #16 + orr r5, r6, r6, lsl #16 + + and r6, lr, r7, lsl #1 + ldrh r6, [r3, r6] + and r8, lr, r7, lsr #7 + ldrh r8, [r3, r8] + and r12,lr, r7, lsr #15 + ldrh r12,[r3, r12] + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] + + orr r6, r10,r6, lsl #16 + orr r8, r8,r12, lsl #16 + + subs r2, r2, #1 + + orr r10,r12,r7, lsl #16 + + stmia r0!, {r4,r5,r6,r8,r10} + bne .fl_32scale_nn + + b .fl_32scale_8bit + +.fl_32scale_snn: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r4, lr, r12, lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r12, lsr #7 + ldrh r5, [r3, r5] + and r6, lr, r12, lsr #15 + ldrh r6, [r3, r6] + and r10,lr, r12, lsr #23 + ldrh r10,[r3, r10] + + and r4, r4, r9 + and r5, r5, r9 + orr r4, r4, r5, lsl #16 + and r6, r6, r9 + add r5, r5, r6 + mov r5, r5, lsr #1 + orr r5, r5, r6, lsl #16 + + and r6, lr, r7, lsl #1 + ldrh r6, [r3, r6] + and r8, lr, r7, lsr #7 + ldrh r8, [r3, r8] + and r12,lr, r7, lsr #15 + ldrh r12,[r3, r12] + and r7, lr, r7, lsr #23 + ldrh r7, [r3, r7] + + and r6, r6, r9 + and r10,r10,r9 + orr r6, r10,r6, lsl #16 + and r12,r12,r9 + and r7, r7, r9 + orr r10,r12,r7, lsl #16 + + and r8, r8, r9 + add r12,r12,r8 + mov r12,r12,lsr #1 + orr r8, r8,r12, lsl #16 + + subs r2, r2, #1 + + stmia r0!, {r4,r5,r6,r8,r10} + bne .fl_32scale_snn + + b .fl_32scale_8bit + +.fl_32scale_bl2: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r4, lr, r12, lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r12, lsr #7 + ldrh r5, [r3, r5] + and r6, lr, r12, lsr #15 + ldrh r6, [r3, r6] + + and r4, r4, r9 + and r5, r5, r9 + add r10,r4, r5 + mov r10,r10,lsr #1 + orr r4, r4, r10,lsl #16 @ px0 | (px0+px1)/2 + + and r6, r6, r9 + add r5, r5, r6 + mov r5, r5, lsr #1 + orr r5, r5, r6, lsl #16 @ (px1+px2)/2 | px2 + + and r10,lr, r12, lsr #23 + ldrh r10,[r3, r10] + and r8, lr, r7, lsl #1 + ldrh r8, [r3, r8] + + and r10,r10,r9 + and r8, r8, r9 + orr r6, r10,r8, lsl #16 @ px3 | px4 + + and r12,lr, r7, lsr #15 + ldrh r12,[r3, r12] + and r10, lr, r7, lsr #23 + ldrh r10, [r3, r10] + and r7, lr, r7, lsr #7 + ldrh r7, [r3, r7] + + and r12,r12,r9 + and r10,r10,r9 + orr r10,r12,r10, lsl #16 @ px6 | px7 + + and r7, r7, r9 + add r12,r12,r7 + add r8, r8, r7 + mov r8, r8, lsr #1 + mov r12,r12,lsr #1 + orr r8, r8,r12, lsl #16 @ (px4+px5)/2 | (px5+px6)/2 + + subs r2, r2, #1 + + stmia r0!, {r4,r5,r6,r8,r10} + bne .fl_32scale_bl2 + + b .fl_32scale_8bit + +.fl_32scale_bl4: + // TODO this should reflect the bl4 C algorithm, but it doesn't, it's bln. + and r9, r9, r9, lsl #1 @ nuke 2 LSBs to avoid spilling for n/4 +.fl_32loop_bl4: ldr r12, [r1], #4 ldr r7, [r1], #4 @@ -1826,16 +1950,21 @@ FinalizeLineRGB555_pal_done: ldrh r4, [r3, r4] and r5, lr, r12,lsr #7 ldrh r5, [r3, r5] - and r4, r4, r9, lsl #2 + + @ r4 = 1/4px0+3/4px1 : px0 + and r4, r4, r9 orr r4, r4, r4, lsl #14 @ r4[31:16] = 1/4 pix_s 0 - and r5, r5, r9, lsl #2 + and r5, r5, r9 sub r6, r5, r5, lsr #2 @ r6 = 3/4 pix_s 1 add r4, r4, r6, lsl #16 @ pix_d 0, 1 + and r6, lr, r12,lsr #15 ldrh r6, [r3, r6] and r12,lr, r12,lsr #23 ldrh r12,[r3, r12] - and r6, r6, r9, lsl #2 + + @ r5 = 3/4px2+1/4px3 : (px1+px2)/2 + and r6, r6, r9 add r5, r5, r6 mov r5, r5, lsr #1 sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 @@ -1843,36 +1972,218 @@ FinalizeLineRGB555_pal_done: and r6, lr, r7, lsl #1 ldrh r6, [r3, r6] - and r12,r12,r9, lsl #2 + and r12,r12,r9 add r5, r5, r12,lsl #14 @ pix_d 2, 3 - and r6, r6, r9, lsl #2 + + @ r6 = px4 : px3 + and r6, r6, r9 orr r6, r12,r6, lsl #16 @ pix_d 4, 5 + @ r8 = (px5+px6)/2 : 1/4px4+3/4px5 and r12,lr, r7, lsr #7 ldrh r12,[r3, r12] and r10,lr, r7, lsr #15 ldrh r10,[r3, r10] - and r12,r12,r9, lsl #2 + and r12,r12,r9 sub r8, r12,r12,lsr #2 @ r8 = 3/4 pix_s 1 add r8, r8, r6, lsr #18 + and r7, lr, r7, lsr #23 ldrh r7, [r3, r7] - and r10,r10,r9, lsl #2 + and r10,r10,r9 orr r8, r8, r10,lsl #15 add r8, r8, r12,lsl #15 @ pix_d 6, 7 + + @ r10 = px7 : 3/4px6+1/4px7 sub r10,r10,r10,lsr #2 @ r10= 3/4 pix_s 2 - and r7, r7, r9, lsl #2 + and r7, r7, r9 add r10,r10,r7, lsr #2 @ += 1/4 pix_s 3 orr r10,r10,r7, lsl #16 @ pix_d 8, 9 subs r2, r2, #1 stmia r0!, {r4,r5,r6,r8,r10} - bne .fl_loop32scale_RGB555 + bne .fl_32loop_bl4 + +.fl_32scale_8bit: + ldr r4, [r11, #OFS_EST_rendstatus] + add r0, r1, #320-256 + mov r2, #256/8 + tst r4, #PDRAW_32X_SCALE + ldmeqfd sp!, {r4-r11,pc} + mov lr, #0xff + +.fl_32scale_8bit_nn: + ldr r7, [r1, #-4]! + ldr r12, [r1, #-4]! + + and r4, lr, r12, lsl #0 + and r5, lr, r12, lsr #8 + and r6, lr, r12, lsr #16 + and r10,lr, r12, lsr #24 + + orr r4, r4, r5, lsl #8 + orr r5, r6, r6, lsl #8 + + and r6, lr, r7, lsl #0 + and r8, lr, r7, lsr #8 + and r12,lr, r7, lsr #16 + and r7, lr, r7, lsr #24 + + orr r6, r10,r6, lsl #8 + orr r8, r8,r12, lsl #8 + + subs r2, r2, #1 + + orr r10,r12,r7, lsl #8 + + strh r10, [r0, #-2]! + strh r8, [r0, #-2]! + strh r6, [r0, #-2]! + strh r5, [r0, #-2]! + strh r4, [r0, #-2]! + + bne .fl_32scale_8bit_nn + + ldmfd sp!, {r4-r11,pc} + + +.fl_20scale_RGB555: + ldr r5, [r5, #OFS_PicoIn_filter-OFS_PicoIn_opt] + + mov r9, #0xf700 @ f800 07e0 001f | e000 0780 001c | 3800 01e0 0007 + orr r9, r9, #0x00de + +#ifdef UNALIGNED_DRAWLINEDEST + tst r0, #2 + bne .fl_20scale_RGB555u +#endif + + and r5, r5, #0x2 + add pc, pc, r5, lsl #1 + nop + b .fl_20scale_nn + b .fl_20scale_bl2 + +.fl_20scale_nn: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r4, lr, r12, lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r12, lsr #7 + ldrh r5, [r3, r5] + and r6, lr, r12, lsr #15 + ldrh r6, [r3, r6] + and r8 ,lr, r12, lsr #23 + ldrh r8 ,[r3, r8] + + orr r4, r4, r4, lsl #16 + orr r5, r5, r5, lsl #16 + orr r6, r6, r6, lsl #16 + orr r8, r8, r8, lsl #16 + stmia r0!, {r4,r5,r6,r8} + + and r4, lr, r7, lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r7, lsr #7 + ldrh r5, [r3, r5] + and r6 ,lr, r7, lsr #15 + ldrh r6 ,[r3, r6] + and r8, lr, r7, lsr #23 + ldrh r8, [r3, r8] + + orr r4, r4, r4, lsl #16 + orr r5, r5, r5, lsl #16 + orr r6, r6, r6, lsl #16 + orr r8, r8, r8, lsl #16 + stmia r0!, {r4,r5,r6,r8} + + subs r2, r2, #1 + bne .fl_20scale_nn + + ldmfd sp!, {r4-r11,pc} + + +.fl_20scale_bl2: + ldr r8, [r1] + and r8, lr, r8, lsl #1 + ldrh r8, [r3, r8] + and r8, r8, r9 + mov r8, r8, lsl #16 + +.fl_20loop_bl2: + ldr r12, [r1], #4 + ldr r7, [r1], #4 + + and r4, lr, r12, lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r12, lsr #7 + ldrh r5, [r3, r5] + and r6, lr, r12, lsr #15 + ldrh r6, [r3, r6] + + and r4, r4, r9 + add r10,r4, r8, lsr #16 + mov r10,r10,lsr #1 + orr r4, r10,r4, lsl #16 @ (px-1+px0)/2 | px0 + + and r8 ,lr, r12, lsr #23 + ldrh r8 ,[r3, r8] + + and r5, r5, r9 + add r10,r5, r4, lsr #16 + mov r10,r10,lsr #1 + orr r5, r10,r5, lsl #16 @ (px0 +px1)/2 | px1 + + and r6, r6, r9 + add r10,r6, r5, lsr #16 + mov r10,r10,lsr #1 + orr r6, r10,r6, lsl #16 @ (px1 +px2)/2 | px2 + + and r8, r8, r9 + add r10,r8, r6, lsr #16 + mov r10,r10,lsr #1 + orr r8, r10,r8, lsl #16 @ (px2 +px3)/2 | px3 + + stmia r0!, {r4,r5,r6,r8} + + and r4, lr, r7, lsl #1 + ldrh r4, [r3, r4] + and r5, lr, r7, lsr #7 + ldrh r5, [r3, r5] + and r6, lr, r7, lsr #15 + ldrh r6, [r3, r6] + + and r4, r4, r9 + add r10,r4, r8, lsr #16 + mov r10,r10,lsr #1 + orr r4, r10,r4, lsl #16 @ (px-1+px0)/2 | px0 + + and r8 ,lr, r7, lsr #23 + ldrh r8 ,[r3, r8] + + and r5, r5, r9 + add r10,r5, r4, lsr #16 + mov r10,r10,lsr #1 + orr r5, r10,r5, lsl #16 @ (px0 +px1)/2 | px1 + + and r6, r6, r9 + add r10,r6, r5, lsr #16 + mov r10,r10,lsr #1 + orr r6, r10,r6, lsl #16 @ (px1 +px2)/2 | px2 + + and r8, r8, r9 + add r10,r8, r6, lsr #16 + mov r10,r10,lsr #1 + orr r8, r10,r8, lsl #16 @ (px2 +px3)/2 | px3 + + subs r2, r2, #1 + stmia r0!, {r4,r5,r6,r8} + bne .fl_20loop_bl2 + + ldmfd sp!, {r4-r11,pc} - ldmfd sp!, {r10} - ldmfd sp!, {r4-r9,lr} - bx lr #ifdef UNALIGNED_DRAWLINEDEST @ unaligned versions of loops @@ -1917,8 +2228,7 @@ FinalizeLineRGB555_pal_done: strh r8, [r0], #2 - ldmfd sp!, {r4-r9,lr} - bx lr + ldmfd sp!, {r4-r11,pc} .fl_32scale_RGB555u: @@ -1934,10 +2244,10 @@ FinalizeLineRGB555_pal_done: ldrh r6, [r3, r6] and r5, lr, r12,lsr #7 ldrh r5, [r3, r5] - and r6, r6, r9, lsl #2 + and r6, r6, r9 orr r4, r4, r6, lsl #16 @ r4 = pix_d -1, 0 - and r5, r5, r9, lsl #2 + and r5, r5, r9 sub r8, r5, r5, lsr #2 @ r8 = 3/4 pix_s 1 add r6, r8, r6, lsr #2 @ r6 = (1/4 pix_s 0) + (3/4 pix_s 1) orr r5, r6, r5, lsl #15 @@ -1946,20 +2256,20 @@ FinalizeLineRGB555_pal_done: ldrh r6, [r3, r6] and r12,lr, r12,lsr #23 ldrh r12,[r3, r12] - and r6, r6, r9, lsl #2 + and r6, r6, r9 add r5, r5, r6, lsl #15 @ r5 = pix_d 1, 2 and r8, lr, r7, lsl #1 ldrh r8, [r3, r8] and r10,lr, r7, lsr #7 ldrh r10,[r3, r10] - and r12,r12,r9, lsl #2 + and r12,r12,r9 sub r6, r6, r6, lsr #2 @ r6 = 3/4 pix_s 2 add r6, r6, r12,lsr #2 orr r6, r6, r12,lsl #16 @ r6 = pix_d 3, 4 - and r8, r8, r9, lsl #2 - and r10,r10,r9, lsl #2 + and r8, r8, r9 + and r10,r10,r9 sub r12,r10,r10,lsr #2 @ r12 = 3/4 pix_s 5 orr r8, r8, r8, lsl #14 add r8, r8, r12,lsl #16 @ r8 = pix_d 5, 6 @@ -1967,12 +2277,12 @@ FinalizeLineRGB555_pal_done: ldrh r12,[r3, r12] and r7, lr, r7, lsr #23 ldrh r7, [r3, r7] - and r12,r12,r9, lsl #2 + and r12,r12,r9 add r10,r10,r12 mov r10,r10, lsr #1 sub r12,r12,r12,lsr #2 @ r12 = 3/4 pix_s 6 orr r10,r10,r12,lsl #16 - and r7, r7, r9, lsl #2 + and r7, r7, r9 add r10,r10,r7, lsl #14 @ r10 = pix_d 7, 8 subs r2, r2, #1 @@ -1983,9 +2293,7 @@ FinalizeLineRGB555_pal_done: strh r4, [r0], #2 - ldmfd sp!, {r10} - ldmfd sp!, {r4-r9,lr} - bx lr + ldmfd sp!, {r4-r11,pc} #endif /* UNALIGNED_DRAWLINEDEST */ @@ -1997,12 +2305,54 @@ FinalizeLineRGB555_pal_done: blockcpy: stmfd sp!, {r4,r5} - mov r2, r2, lsr #4 + cmp r0, r1 + bhs blockcpyhi + + subs r2, r2, #16 + blt blockcpy2 blockcpy_loop: ldmia r1!, {r3-r5,r12} - subs r2, r2, #1 + subs r2, r2, #16 stmia r0!, {r3-r5,r12} - bne blockcpy_loop + bge blockcpy_loop + +blockcpy2: + adds r2, r2, #16-4 + ldmltfd sp!, {r4,r5} + bxlt lr + +blockcpy_loop2: + ldr r3, [r1], #4 + subs r2, r2, #4 + str r3, [r0], #4 + bge blockcpy_loop2 + + ldmfd sp!, {r4,r5} + bx lr + +blockcpyhi: + add r0, r0, r2 + add r1, r1, r2 + + subs r2, r2, #16 + blt blockcpyhi2 +blockcpyhi_loop: + ldmdb r1!, {r3-r5,r12} + subs r2, r2, #16 + stmdb r0!, {r3-r5,r12} + bge blockcpyhi_loop + +blockcpyhi2: + adds r2, r2, #16-4 + ldmltfd sp!, {r4,r5} + bxlt lr + +blockcpyhi_loop2: + ldr r3, [r1, #-4]! + subs r2, r2, #4 + str r3, [r0, #-4]! + bge blockcpyhi_loop2 + ldmfd sp!, {r4,r5} bx lr @@ -2013,16 +2363,64 @@ blockcpy_or: stmfd sp!, {r4-r6} orr r3, r3, r3, lsl #8 orr r3, r3, r3, lsl #16 - mov r2, r2, lsr #4 + cmp r0, r1 + bhs blockcpyhi_or + + subs r2, r2, #16 + blt blockcpy_or2 blockcpy_loop_or: ldmia r1!, {r4-r6,r12} - subs r2, r2, #1 + subs r2, r2, #16 orr r4, r4, r3 orr r5, r5, r3 orr r6, r6, r3 orr r12,r12,r3 stmia r0!, {r4-r6,r12} - bne blockcpy_loop_or + bge blockcpy_loop_or + +blockcpy_or2: + adds r2, r2, #16-4 + ldmltfd sp!, {r4-r6} + bxlt lr + +blockcpy_loop_or2: + ldr r4, [r1], #4 + subs r2, r2, #4 + orr r4, r4, r3 + str r4, [r0], #4 + bge blockcpy_loop_or2 + + ldmfd sp!, {r4-r6} + bx lr + +blockcpyhi_or: + add r0, r0, r2 + add r1, r1, r2 + + subs r2, r2, #16 + blt blockcpyhi_or2 +blockcpyhi_loop_or: + ldmdb r1!, {r4-r6,r12} + subs r2, r2, #16 + orr r4, r4, r3 + orr r5, r5, r3 + orr r6, r6, r3 + orr r12,r12,r3 + stmdb r0!, {r4-r6,r12} + bge blockcpyhi_loop_or + +blockcpyhi_or2: + adds r2, r2, #16-4 + ldmltfd sp!, {r4-r6} + bxlt lr + +blockcpyhi_loop_or2: + ldr r4, [r1, #-4]! + subs r2, r2, #4 + orr r4, r4, r3 + str r4, [r0, #-4]! + bge blockcpyhi_loop_or2 + ldmfd sp!, {r4-r6} bx lr diff --git a/pico/eeprom.c b/pico/eeprom.c index 16f6c05c..d727766d 100644 --- a/pico/eeprom.c +++ b/pico/eeprom.c @@ -42,14 +42,14 @@ static void EEPROM_write_do(unsigned int d) // ???? ??la (l=SCL, a=SDA) { // we are started and SCL went high - next cycle scyc++; // pre-increment - if(SRam.eeprom_type) { + if(Pico.sv.eeprom_type) { // X24C02+ if((ssa&1) && scyc == 18) { scyc = 9; saddr++; // next address in read mode - /*if(SRam.eeprom_type==2) saddr&=0xff; else*/ saddr&=0x1fff; // mask + /*if(Pico.sv.eeprom_type==2) saddr&=0xff; else*/ saddr&=0x1fff; // mask } - else if(SRam.eeprom_type == 2 && scyc == 27) scyc = 18; + else if(Pico.sv.eeprom_type == 2 && scyc == 27) scyc = 18; else if(scyc == 36) scyc = 27; } else { // X24C01 @@ -63,29 +63,29 @@ static void EEPROM_write_do(unsigned int d) // ???? ??la (l=SCL, a=SDA) else if((sreg & 8) && (sreg & 2) && !(d&2)) { // we are started and SCL went low (falling edge) - if(SRam.eeprom_type) { + if(Pico.sv.eeprom_type) { // X24C02+ if(scyc == 9 || scyc == 18 || scyc == 27); // ACK cycles - else if( (SRam.eeprom_type == 3 && scyc > 27) || (SRam.eeprom_type == 2 && scyc > 18) ) { + else if( (Pico.sv.eeprom_type == 3 && scyc > 27) || (Pico.sv.eeprom_type == 2 && scyc > 18) ) { if(!(ssa&1)) { // data write - unsigned char *pm=SRam.data+saddr; + unsigned char *pm=Pico.sv.data+saddr; *pm <<= 1; *pm |= d&1; if(scyc == 26 || scyc == 35) { saddr=(saddr&~0xf)|((saddr+1)&0xf); // only 4 (?) lowest bits are incremented elprintf(EL_EEPROM, "eeprom: write done, addr inc to: %x, last byte=%02x", saddr, *pm); } - SRam.changed = 1; + Pico.sv.changed = 1; } } else if(scyc > 9) { if(!(ssa&1)) { // we latch another addr bit saddr<<=1; - if(SRam.eeprom_type == 2) saddr&=0xff; else saddr&=0x1fff; // mask + if(Pico.sv.eeprom_type == 2) saddr&=0xff; else saddr&=0x1fff; // mask saddr|=d&1; if(scyc==17||scyc==26) { elprintf(EL_EEPROM, "eeprom: addr reg done: %x", saddr); - if(scyc==17&&SRam.eeprom_type==2) { saddr&=0xff; saddr|=(ssa<<7)&0x700; } // add device bits too + if(scyc==17&&Pico.sv.eeprom_type==2) { saddr&=0xff; saddr|=(ssa<<7)&0x700; } // add device bits too } } } else { @@ -99,13 +99,13 @@ static void EEPROM_write_do(unsigned int d) // ???? ??la (l=SCL, a=SDA) else if(scyc > 9) { if(!(saddr&1)) { // data write - unsigned char *pm=SRam.data+(saddr>>1); + unsigned char *pm=Pico.sv.data+(saddr>>1); *pm <<= 1; *pm |= d&1; if(scyc == 17) { saddr=(saddr&0xf9)|((saddr+2)&6); // only 2 lowest bits are incremented elprintf(EL_EEPROM, "eeprom: write done, addr inc to: %x, last byte=%02x", saddr>>1, *pm); } - SRam.changed = 1; + Pico.sv.changed = 1; } } else { // we latch another addr bit @@ -129,11 +129,11 @@ static void EEPROM_upd_pending(unsigned int d) sreg &= ~0xc0; // SCL - d1 = (d >> SRam.eeprom_bit_cl) & 1; + d1 = (d >> Pico.sv.eeprom_bit_cl) & 1; sreg |= d1 << 7; // SDA in - d1 = (d >> SRam.eeprom_bit_in) & 1; + d1 = (d >> Pico.sv.eeprom_bit_in) & 1; sreg |= d1 << 6; Pico.m.eeprom_status = (unsigned char) sreg; @@ -190,23 +190,23 @@ unsigned int EEPROM_read(void) } else if (scyc > 9 && scyc < 18) { // started and first command word received shift = 17-scyc; - if (SRam.eeprom_type) { + if (Pico.sv.eeprom_type) { // X24C02+ if (ssa&1) { elprintf(EL_EEPROM, "eeprom: read: addr %02x, cycle %i, reg %02x", saddr, scyc, sreg); - if (shift==0) elprintf(EL_EEPROM, "eeprom: read done, byte %02x", SRam.data[saddr]); - d = (SRam.data[saddr]>>shift)&1; + if (shift==0) elprintf(EL_EEPROM, "eeprom: read done, byte %02x", Pico.sv.data[saddr]); + d = (Pico.sv.data[saddr]>>shift)&1; } } else { // X24C01 if (saddr&1) { elprintf(EL_EEPROM, "eeprom: read: addr %02x, cycle %i, reg %02x", saddr>>1, scyc, sreg); - if (shift==0) elprintf(EL_EEPROM, "eeprom: read done, byte %02x", SRam.data[saddr>>1]); - d = (SRam.data[saddr>>1]>>shift)&1; + if (shift==0) elprintf(EL_EEPROM, "eeprom: read done, byte %02x", Pico.sv.data[saddr>>1]); + d = (Pico.sv.data[saddr>>1]>>shift)&1; } } } - return (d << SRam.eeprom_bit_out); + return (d << Pico.sv.eeprom_bit_out); } diff --git a/pico/m68kif_cyclone.s b/pico/m68kif_cyclone.s index a0a508cd..55e996a4 100644 --- a/pico/m68kif_cyclone.s +++ b/pico/m68kif_cyclone.s @@ -27,18 +27,18 @@ cyclone_checkpc: and r3, r0, #0xff000000 bic r0, r0, #1 bics r2, r0, #0xff000000 - beq crashed + @ ouf, some Codemasters titles actually start at address 0 + @ beq crashed ldr r1, [r7, #0x6c] @ read16 map mov r2, r2, lsr #M68K_MEM_SHIFT ldr r1, [r1, r2, lsl #2] - movs r1, r1, lsl #1 - bcs crashed - sub r1, r1, r3 - str r1, [r7, #0x60] @ membase - add r0, r0, r1 - bx lr + movs r1, r1, lsl #1 + subcc r1, r1, r3 + strcc r1, [r7, #0x60] @ membase + addcc r0, r0, r1 + bxcc lr crashed: stmfd sp!,{lr} @@ -82,24 +82,24 @@ cyclone_fetch32: ldr r1, [r1, r2, lsl #2] bic r0, r0, #1 movs r1, r1, lsl #1 - ldrcch r0, [r1, r0]! + ldrcch r2, [r1, r0]! ldrcch r1, [r1, #2] - orrcc r0, r1, r0, lsl #16 + orrcc r0, r1, r2, lsl #16 bxcc lr - stmfd sp!,{r0,r1,lr} + stmfd sp!,{r0,r1,r2,lr} mov lr, pc bx r1 mov r2, r0, lsl #16 - ldmia sp, {r0,r1} + ldmfd sp!, {r0,r1} str r2, [sp] add r0, r0, #2 mov lr, pc bx r1 - ldr r1, [sp] + ldmfd sp!, {r1,lr} mov r0, r0, lsl #16 orr r0, r1, r0, lsr #16 - ldmfd sp!,{r1,r2,pc} + bx lr cyclone_write8: @ u32 a, u8 d diff --git a/pico/media.c b/pico/media.c index 904693f9..51e3fbd8 100644 --- a/pico/media.c +++ b/pico/media.c @@ -8,7 +8,7 @@ #include #include "pico_int.h" -#include "cd/cue.h" +#include "cd/cd_parse.h" unsigned char media_id_header[0x100]; @@ -31,34 +31,50 @@ static void get_ext(const char *file, char *ext) strlwr_(ext); } -static int detect_media(const char *fname) +static int detect_media(const char *fname, const unsigned char *rom, unsigned int romsize) { static const short sms_offsets[] = { 0x7ff0, 0x3ff0, 0x1ff0 }; - static const char *sms_exts[] = { "sms", "gg", "sg" }; - static const char *md_exts[] = { "gen", "bin", "smd" }; - char buff0[32], buff[32]; - unsigned short *d16; - pm_file *pmf; - char ext[5]; + static const char *sms_exts[] = { "sms", "gg", "sg", "sc" }; + static const char *md_exts[] = { "gen", "smd", "md", "32x" }; + static const char *pico_exts[] = { "pco" }; + char buff0[512], buff[32]; + unsigned short *d16 = NULL; + pm_file *pmf = NULL; + const char *ext_ptr = NULL; + char ext[8]; int i; - get_ext(fname, ext); + ext[0] = '\0'; + if ((ext_ptr = strrchr(fname, '.'))) { + strncpy(ext, ext_ptr + 1, sizeof(ext)); + ext[sizeof(ext) - 1] = '\0'; + } // detect wrong extensions - if (!strcmp(ext, ".srm") || !strcmp(ext, "s.gz") || !strcmp(ext, ".mds")) // s.gz ~ .mds.gz + if (!strcasecmp(ext, "srm") || !strcasecmp(ext, "gz")) // s.gz ~ .mds.gz return PM_BAD_DETECT; - /* don't believe in extensions, except .cue */ - if (strcasecmp(ext, ".cue") == 0) + /* don't believe in extensions, except .cue and .chd */ + if (strcasecmp(ext, "cue") == 0 || strcasecmp(ext, "chd") == 0) return PM_CD; - pmf = pm_open(fname); - if (pmf == NULL) - return PM_BAD_DETECT; + /* Open rom file, if required */ + if (!rom) { + pmf = pm_open(fname); + if (pmf == NULL) + return PM_BAD_DETECT; + romsize = pmf->size; + } - if (pm_read(buff0, 32, pmf) != 32) { - pm_close(pmf); - return PM_BAD_DETECT; + if (!rom) { + if (pm_read(buff0, 512, pmf) != 512) { + pm_close(pmf); + return PM_BAD_DETECT; + } + } else { + if (romsize < 512) + return PM_BAD_DETECT; + memcpy(buff0, rom, 512); } if (strncasecmp("SEGADISCSYSTEM", buff0 + 0x00, 14) == 0 || @@ -68,28 +84,53 @@ static int detect_media(const char *fname) } /* check for SMD evil */ - if (pmf->size >= 0x4200 && (pmf->size & 0x3fff) == 0x200) { - if (pm_seek(pmf, sms_offsets[0] + 0x200, SEEK_SET) == sms_offsets[0] + 0x200 && - pm_read(buff, 16, pmf) == 16 && - strncmp("TMR SEGA", buff, 8) == 0) + if (romsize >= 0x4200 && (romsize & 0x3fff) == 0x200) { + buff[0] = '\0'; + + if (!rom) { + if (pm_seek(pmf, sms_offsets[0] + 0x200, SEEK_SET) == sms_offsets[0] + 0x200) + pm_read(buff, 16, pmf); + } else { + if (romsize >= sms_offsets[0] + 0x200 + 16) + memcpy(buff, rom + sms_offsets[0] + 0x200, 16); + } + + if (strncmp("TMR SEGA", buff, 8) == 0) goto looks_like_sms; /* could parse further but don't bother */ goto extension_check; } - /* MD header? Act as TMSS BIOS here */ - if (pm_seek(pmf, 0x100, SEEK_SET) == 0x100 && pm_read(buff, 16, pmf) == 16) { - if (strncmp(buff, "SEGA", 4) == 0 || strncmp(buff, " SEG", 4) == 0) - goto looks_like_md; + /* fetch header info */ + memset(buff, '\0', 17); + if (!rom) { + if (pm_seek(pmf, 0x100, SEEK_SET) == 0x100) + pm_read(buff, 16, pmf); + } else { + if (romsize >= 0x100 + 16) + memcpy(buff, rom + 0x100, 16); } + /* PICO header? Almost always appropriately marked */ + if (strstr(buff, " PICO ")) + goto looks_like_pico; + /* MD header? Act as TMSS BIOS here */ + if (strncmp(buff, "SEGA", 4) == 0 || strncmp(buff, " SEG", 4) == 0) + goto looks_like_md; for (i = 0; i < ARRAY_SIZE(sms_offsets); i++) { - if (pm_seek(pmf, sms_offsets[i], SEEK_SET) != sms_offsets[i]) - continue; + if (!rom) { + if (pm_seek(pmf, sms_offsets[i], SEEK_SET) != sms_offsets[i]) + continue; - if (pm_read(buff, 16, pmf) != 16) - continue; + if (pm_read(buff, 16, pmf) != 16) + continue; + } else { + if (romsize < sms_offsets[i] + 16) + continue; + + memcpy(buff, rom + sms_offsets[i], 16); + } if (strncmp("TMR SEGA", buff, 8) == 0) goto looks_like_sms; @@ -97,20 +138,31 @@ static int detect_media(const char *fname) extension_check: /* probably some headerless thing. Maybe check the extension after all. */ + ext_ptr = pmf && *pmf->ext ? pmf->ext : ext; + for (i = 0; i < ARRAY_SIZE(md_exts); i++) - if (strcasecmp(pmf->ext, md_exts[i]) == 0) + if (strcasecmp(ext_ptr, md_exts[i]) == 0) goto looks_like_md; for (i = 0; i < ARRAY_SIZE(sms_exts); i++) - if (strcasecmp(pmf->ext, sms_exts[i]) == 0) + if (strcasecmp(ext_ptr, sms_exts[i]) == 0) goto looks_like_sms; + for (i = 0; i < ARRAY_SIZE(pico_exts); i++) + if (strcasecmp(ext_ptr, pico_exts[i]) == 0) + goto looks_like_pico; + /* If everything else fails, make a guess on the reset vector */ d16 = (unsigned short *)(buff0 + 4); - if ((((d16[0] << 16) | d16[1]) & 0xffffff) >= pmf->size) { + if ((((d16[0] << 16) | d16[1]) & 0xffffff) >= romsize) { lprintf("bad MD reset vector, assuming SMS\n"); goto looks_like_sms; } + d16 = (unsigned short *)(buff0 + 0x1a0); + if ((((d16[0] << 16) | d16[1]) & 0xffffff) != 0) { + lprintf("bad MD rom start, assuming SMS\n"); + goto looks_like_sms; + } looks_like_md: pm_close(pmf); @@ -119,6 +171,10 @@ looks_like_md: looks_like_sms: pm_close(pmf); return PM_MARK3; + +looks_like_pico: + pm_close(pmf); + return PM_PICO; } /* checks if fname points to valid MegaCD image */ @@ -129,26 +185,31 @@ int PicoCdCheck(const char *fname_in, int *pregion) pm_file *cd_f; int region = 4; // 1: Japan, 4: US, 8: Europe char ext[5]; - cue_track_type type = CT_UNKNOWN; - cue_data_t *cue_data = NULL; + enum cd_track_type type = CT_UNKNOWN; + cd_data_t *cd_data = NULL; // opens a cue, or searches for one - cue_data = cue_parse(fname_in); - if (cue_data != NULL) { - fname = cue_data->tracks[1].fname; - type = cue_data->tracks[1].type; - } - else { + if (!cd_data && (cd_data = cue_parse(fname_in)) == NULL) { get_ext(fname_in, ext); if (strcasecmp(ext, ".cue") == 0) return -1; } + // opens a chd + if (!cd_data && (cd_data = chd_parse(fname_in)) == NULL) { + get_ext(fname_in, ext); + if (strcasecmp(ext, ".chd") == 0) + return -1; + } + + if (cd_data != NULL) { + // 1st track contains the code + fname = cd_data->tracks[1].fname; + type = cd_data->tracks[1].type; + } cd_f = pm_open(fname); - if (cue_data != NULL) - cue_destroy(cue_data); - - if (cd_f == NULL) return 0; // let the upper level handle this + cdparse_destroy(cd_data); + if (cd_f == NULL) return CT_UNKNOWN; // let the upper level handle this if (pm_read(buf, 32, cd_f) != 32) { pm_close(cd_f); @@ -192,34 +253,35 @@ int PicoCdCheck(const char *fname_in, int *pregion) } enum media_type_e PicoLoadMedia(const char *filename, + const unsigned char *rom, unsigned int romsize, const char *carthw_cfg_fname, const char *(*get_bios_filename)(int *region, const char *cd_fname), void (*do_region_override)(const char *media_filename)) { const char *rom_fname = filename; enum media_type_e media_type; - cd_img_type cd_img_type = CIT_NOT_CD; + enum cd_track_type cd_img_type = CT_UNKNOWN; + pm_file *rom_file = NULL; unsigned char *rom_data = NULL; unsigned int rom_size = 0; - pm_file *rom = NULL; int cd_region = 0; int ret; - media_type = detect_media(filename); + media_type = detect_media(filename, rom, romsize); if (media_type == PM_BAD_DETECT) goto out; - if ((PicoAHW & PAHW_MCD) && Pico_mcd != NULL) - Stop_CD(); + if ((PicoIn.AHW & PAHW_MCD) && Pico_mcd != NULL) + cdd_unload(); PicoCartUnload(); - PicoAHW = 0; - PicoQuirks = 0; + PicoIn.AHW = 0; + PicoIn.quirks = 0; if (media_type == PM_CD) { // check for MegaCD image cd_img_type = PicoCdCheck(filename, &cd_region); - if ((int)cd_img_type >= 0 && cd_img_type != CIT_NOT_CD) + if ((int)cd_img_type >= 0 && cd_img_type != CT_UNKNOWN) { // valid CD image, ask frontend for BIOS.. rom_fname = NULL; @@ -230,7 +292,7 @@ enum media_type_e PicoLoadMedia(const char *filename, goto out; } - PicoAHW |= PAHW_MCD; + PicoIn.AHW |= PAHW_MCD; } else { media_type = PM_BAD_CD; @@ -238,23 +300,26 @@ enum media_type_e PicoLoadMedia(const char *filename, } } else if (media_type == PM_MARK3) { - lprintf("detected SMS ROM\n"); - PicoAHW = PAHW_SMS; + PicoIn.AHW = PAHW_SMS; + } + else if (media_type == PM_PICO) { + PicoIn.AHW = PAHW_PICO; } - rom = pm_open(rom_fname); - if (rom == NULL) { - lprintf("Failed to open ROM"); - media_type = PM_ERROR; - goto out; + if (!rom) { + rom_file = pm_open(rom_fname); + if (rom_file == NULL) { + lprintf("Failed to open ROM\n"); + media_type = PM_ERROR; + goto out; + } } - ret = PicoCartLoad(rom, &rom_data, &rom_size, (PicoAHW & PAHW_SMS) ? 1 : 0); - pm_close(rom); + ret = PicoCartLoad(rom_file, rom, romsize, &rom_data, &rom_size, (PicoIn.AHW & PAHW_SMS) ? 1 : 0); if (ret != 0) { - if (ret == 2) lprintf("Out of memory"); - else if (ret == 3) lprintf("Read failed"); - else lprintf("PicoCartLoad() failed."); + if (ret == 2) lprintf("Out of memory\n"); + else if (ret == 3) lprintf("Read failed\n"); + else lprintf("PicoCartLoad() failed.\n"); media_type = PM_ERROR; goto out; } @@ -266,7 +331,7 @@ enum media_type_e PicoLoadMedia(const char *filename, goto out; } - if (!(PicoAHW & PAHW_SMS)) { + if (!(PicoIn.AHW & PAHW_SMS)) { unsigned short *d = (unsigned short *)(rom_data + 4); if ((((d[0] << 16) | d[1]) & 0xffffff) >= (int)rom_size) { lprintf("bad reset vector\n"); @@ -276,36 +341,63 @@ enum media_type_e PicoLoadMedia(const char *filename, } // load config for this ROM (do this before insert to get correct region) - if (!(PicoAHW & PAHW_MCD)) { + if (!(PicoIn.AHW & PAHW_MCD)) { memcpy(media_id_header, rom_data + 0x100, sizeof(media_id_header)); if (do_region_override != NULL) do_region_override(filename); } + // simple test for GG. Do this here since m.hardware is nulled in Insert + if ((PicoIn.AHW & PAHW_SMS) && !PicoIn.hwSelect) { + const char *ext = NULL; + if (rom_file && (*rom_file->ext != '\0')) { + ext = rom_file->ext; + } + else if ((ext = strrchr(filename, '.'))) { + if (*(++ext) == '\0') { + ext = NULL; + } + } + if (ext && !strcasecmp(ext,"gg")) { + PicoIn.AHW |= PAHW_GG; + lprintf("detected GG ROM\n"); + } else if (ext && !strcasecmp(ext,"sg")) { + PicoIn.AHW |= PAHW_SG; + lprintf("detected SG-1000 ROM\n"); + } else if (ext && !strcasecmp(ext,"sc")) { + PicoIn.AHW |= PAHW_SC; + lprintf("detected SC-3000 ROM\n"); + } else + lprintf("detected SMS ROM\n"); + } + if (PicoCartInsert(rom_data, rom_size, carthw_cfg_fname)) { media_type = PM_ERROR; goto out; } rom_data = NULL; // now belongs to PicoCart - Pico.m.ncart_in = 0; // insert CD if it was detected - if (cd_img_type != CIT_NOT_CD) { - ret = Insert_CD(filename, cd_img_type); + Pico.m.ncart_in = 0; + if (cd_img_type != CT_UNKNOWN) { + ret = cdd_load(filename, cd_img_type); if (ret != 0) { PicoCartUnload(); media_type = PM_BAD_CD; goto out; } - Pico.m.ncart_in = 1; + if (Pico.romsize <= 0x20000) + Pico.m.ncart_in = 1; } - if (PicoQuirks & PQUIRK_FORCE_6BTN) + if (PicoIn.quirks & PQUIRK_FORCE_6BTN) PicoSetInputDevice(0, PICO_INPUT_PAD_6BTN); out: + if (rom_file) + pm_close(rom_file); if (rom_data) - free(rom_data); + PicoCartUnload(); return media_type; } diff --git a/pico/memory.c b/pico/memory.c index 4f38e5e2..91bb2a80 100644 --- a/pico/memory.c +++ b/pico/memory.c @@ -2,6 +2,7 @@ * memory handling * (c) Copyright Dave, 2004 * (C) notaz, 2006-2010 + * (C) irixxxx, 2019-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -20,7 +21,7 @@ uptr m68k_read16_map [0x1000000 >> M68K_MEM_SHIFT]; uptr m68k_write8_map [0x1000000 >> M68K_MEM_SHIFT]; uptr m68k_write16_map[0x1000000 >> M68K_MEM_SHIFT]; -static void xmap_set(uptr *map, int shift, int start_addr, int end_addr, +static void xmap_set(uptr *map, int shift, u32 start_addr, u32 end_addr, const void *func_or_mh, int is_func) { #ifdef __clang__ @@ -53,20 +54,68 @@ static void xmap_set(uptr *map, int shift, int start_addr, int end_addr, } } -void z80_map_set(uptr *map, int start_addr, int end_addr, +void z80_map_set(uptr *map, u16 start_addr, u16 end_addr, const void *func_or_mh, int is_func) { xmap_set(map, Z80_MEM_SHIFT, start_addr, end_addr, func_or_mh, is_func); +#ifdef _USE_CZ80 + if (!is_func) + Cz80_Set_Fetch(&CZ80, start_addr, end_addr, (FPTR)func_or_mh); +#endif } -void cpu68k_map_set(uptr *map, int start_addr, int end_addr, +void cpu68k_map_set(uptr *map, u32 start_addr, u32 end_addr, const void *func_or_mh, int is_func) { - xmap_set(map, M68K_MEM_SHIFT, start_addr, end_addr, func_or_mh, is_func); + xmap_set(map, M68K_MEM_SHIFT, start_addr, end_addr, func_or_mh, is_func & 1); +#ifdef EMU_F68K + // setup FAME fetchmap + if (!(is_func & 1)) + { + M68K_CONTEXT *ctx = is_func & 2 ? &PicoCpuFS68k : &PicoCpuFM68k; + int shiftout = 24 - FAMEC_FETCHBITS; + int i = start_addr >> shiftout; + uptr base = (uptr)func_or_mh - (i << shiftout); + for (; i <= (end_addr >> shiftout); i++) + ctx->Fetch[i] = base; + } +#endif } // more specialized/optimized function (does same as above) -void cpu68k_map_all_ram(int start_addr, int end_addr, void *ptr, int is_sub) +void cpu68k_map_read_mem(u32 start_addr, u32 end_addr, void *ptr, int is_sub) +{ + uptr *r8map, *r16map; + uptr addr = (uptr)ptr; + int shift = M68K_MEM_SHIFT; + int i; + + if (!is_sub) { + r8map = m68k_read8_map; + r16map = m68k_read16_map; + } else { + r8map = s68k_read8_map; + r16map = s68k_read16_map; + } + + addr -= start_addr; + addr >>= 1; + for (i = start_addr >> shift; i <= end_addr >> shift; i++) + r8map[i] = r16map[i] = addr; +#ifdef EMU_F68K + // setup FAME fetchmap + { + M68K_CONTEXT *ctx = is_sub ? &PicoCpuFS68k : &PicoCpuFM68k; + int shiftout = 24 - FAMEC_FETCHBITS; + i = start_addr >> shiftout; + addr = (uptr)ptr - (i << shiftout); + for (; i <= (end_addr >> shiftout); i++) + ctx->Fetch[i] = addr; + } +#endif +} + +void cpu68k_map_all_ram(u32 start_addr, u32 end_addr, void *ptr, int is_sub) { uptr *r8map, *r16map, *w8map, *w16map; uptr addr = (uptr)ptr; @@ -89,18 +138,87 @@ void cpu68k_map_all_ram(int start_addr, int end_addr, void *ptr, int is_sub) addr >>= 1; for (i = start_addr >> shift; i <= end_addr >> shift; i++) r8map[i] = r16map[i] = w8map[i] = w16map[i] = addr; +#ifdef EMU_F68K + // setup FAME fetchmap + { + M68K_CONTEXT *ctx = is_sub ? &PicoCpuFS68k : &PicoCpuFM68k; + int shiftout = 24 - FAMEC_FETCHBITS; + i = start_addr >> shiftout; + addr = (uptr)ptr - (i << shiftout); + for (; i <= (end_addr >> shiftout); i++) + ctx->Fetch[i] = addr; + } +#endif +} + +void cpu68k_map_read_funcs(u32 start_addr, u32 end_addr, u32 (*r8)(u32), u32 (*r16)(u32), int is_sub) +{ + uptr *r8map, *r16map; + uptr ar8 = (uptr)r8, ar16 = (uptr)r16; + int shift = M68K_MEM_SHIFT; + int i; + + if (!is_sub) { + r8map = m68k_read8_map; + r16map = m68k_read16_map; + } else { + r8map = s68k_read8_map; + r16map = s68k_read16_map; + } + + ar8 = (ar8 >> 1 ) | MAP_FLAG; + ar16 = (ar16 >> 1 ) | MAP_FLAG; + for (i = start_addr >> shift; i <= end_addr >> shift; i++) + r8map[i] = ar8, r16map[i] = ar16; +} + +void cpu68k_map_all_funcs(u32 start_addr, u32 end_addr, u32 (*r8)(u32), u32 (*r16)(u32), void (*w8)(u32, u32), void (*w16)(u32, u32), int is_sub) +{ + uptr *r8map, *r16map, *w8map, *w16map; + uptr ar8 = (uptr)r8, ar16 = (uptr)r16; + uptr aw8 = (uptr)w8, aw16 = (uptr)w16; + int shift = M68K_MEM_SHIFT; + int i; + + if (!is_sub) { + r8map = m68k_read8_map; + r16map = m68k_read16_map; + w8map = m68k_write8_map; + w16map = m68k_write16_map; + } else { + r8map = s68k_read8_map; + r16map = s68k_read16_map; + w8map = s68k_write8_map; + w16map = s68k_write16_map; + } + + ar8 = (ar8 >> 1 ) | MAP_FLAG; + ar16 = (ar16 >> 1 ) | MAP_FLAG; + aw8 = (aw8 >> 1 ) | MAP_FLAG; + aw16 = (aw16 >> 1 ) | MAP_FLAG; + for (i = start_addr >> shift; i <= end_addr >> shift; i++) + r8map[i] = ar8, r16map[i] = ar16, w8map[i] = aw8, w16map[i] = aw16; +} + +u32 PicoRead16_floating(u32 a) +{ + // faking open bus + u32 d = (Pico.m.rotate += 0x41); + d ^= (d << 5) ^ (d << 8); + if ((a & 0xff0000) == 0xa10000) return d; // MegaCD pulldowns don't work here curiously + return (PicoIn.AHW & PAHW_MCD) ? 0x00 : d; // pulldown if MegaCD2 attached } static u32 m68k_unmapped_read8(u32 a) { elprintf(EL_UIO, "m68k unmapped r8 [%06x] @%06x", a, SekPc); - return 0; // assume pulldown, as if MegaCD2 was attached + return a < 0x400000 ? 0 : (u8)PicoRead16_floating(a); } static u32 m68k_unmapped_read16(u32 a) { elprintf(EL_UIO, "m68k unmapped r16 [%06x] @%06x", a, SekPc); - return 0; + return a < 0x400000 ? 0 : PicoRead16_floating(a); } static void m68k_unmapped_write8(u32 a, u32 d) @@ -113,7 +231,7 @@ static void m68k_unmapped_write16(u32 a, u32 d) elprintf(EL_UIO, "m68k unmapped w16 [%06x] %04x @%06x", a, d & 0xffff, SekPc); } -void m68k_map_unmap(int start_addr, int end_addr) +void m68k_map_unmap(u32 start_addr, u32 end_addr) { #ifdef __clang__ // workaround bug (segfault) in @@ -141,12 +259,14 @@ void m68k_map_unmap(int start_addr, int end_addr) m68k_write16_map[i] = (addr >> 1) | MAP_FLAG; } +#ifndef _ASM_MEMORY_C MAKE_68K_READ8(m68k_read8, m68k_read8_map) MAKE_68K_READ16(m68k_read16, m68k_read16_map) MAKE_68K_READ32(m68k_read32, m68k_read16_map) MAKE_68K_WRITE8(m68k_write8, m68k_write8_map) MAKE_68K_WRITE16(m68k_write16, m68k_write16_map) MAKE_68K_WRITE32(m68k_write32, m68k_write16_map) +#endif // ----------------------------------------------------------------- @@ -188,7 +308,7 @@ void cyclone_crashed(u32 pc, struct Cyclone *context) static u32 read_pad_3btn(int i, u32 out_bits) { - u32 pad = ~PicoPadInt[i]; // Get inverse of pad MXYZ SACB RLDU + u32 pad = ~PicoIn.padInt[i]; // Get inverse of pad MXYZ SACB RLDU u32 value; if (out_bits & 0x40) // TH @@ -202,7 +322,7 @@ static u32 read_pad_3btn(int i, u32 out_bits) static u32 read_pad_6btn(int i, u32 out_bits) { - u32 pad = ~PicoPadInt[i]; // Get inverse of pad MXYZ SACB RLDU + u32 pad = ~PicoIn.padInt[i]; // Get inverse of pad MXYZ SACB RLDU int phase = Pico.m.padTHPhase[i]; u32 value; @@ -212,9 +332,9 @@ static u32 read_pad_6btn(int i, u32 out_bits) } else if(phase == 3) { if (out_bits & 0x40) - return (pad & 0x30) | ((pad >> 8) & 0xf); // ?1CB MXYZ + value = (pad & 0x30) | ((pad >> 8) & 0xf); // ?1CB MXYZ else - return ((pad & 0xc0) >> 2) | 0x0f; // ?0SA 1111 + value = ((pad & 0xc0) >> 2) | 0x0f; // ?0SA 1111 goto out; } @@ -228,6 +348,51 @@ out: return value; } +static u32 read_pad_team(int i, u32 out_bits) +{ + u32 pad; + int phase = Pico.m.padTHPhase[i]; + u32 value; + + switch (phase) { + case 0: + value = 0x03; + break; + case 1: + value = 0x0f; + break; + case 4: case 5: case 6: case 7: // controller IDs, all 3 btn for now + value = 0x00; + break; + case 8: case 10: case 12: case 14: + pad = ~PicoIn.padInt[(phase-8) >> 1]; + value = pad & 0x0f; // ?x?x RLDU + break; + case 9: case 11: case 13: case 15: + pad = ~PicoIn.padInt[(phase-8) >> 1]; + value = (pad & 0xf0) >> 4; // ?x?x SACB + break; + default: + value = 0; + break; + } + + value |= (out_bits & 0x40) | ((out_bits & 0x20)>>1); + return value; +} + +static u32 read_pad_4way(int i, u32 out_bits) +{ + u32 pad = (PicoMem.ioports[2] & 0x70) >> 4; + u32 value = 0; + + if (i == 0 && pad <= 3) + value = read_pad_3btn(pad, out_bits); + + value |= (out_bits & 0x40); + return value; +} + static u32 read_nothing(int i, u32 out_bits) { return 0xff; @@ -241,14 +406,28 @@ static port_read_func *port_readers[3] = { read_nothing }; +static int padTHLatency[3]; // TODO this should be in the save file structures + static NOINLINE u32 port_read(int i) { - u32 data_reg = Pico.ioports[i + 1]; - u32 ctrl_reg = Pico.ioports[i + 4] | 0x80; + u32 data_reg = PicoMem.ioports[i + 1]; + u32 ctrl_reg = PicoMem.ioports[i + 4] | 0x80; u32 in, out; out = data_reg & ctrl_reg; - out |= 0x7f & ~ctrl_reg; // pull-ups + + // pull-ups: should be 0x7f, but Decap Attack has a bug where it temp. + // disables output before doing TH-low read, so emulate RC filter for TH. + // Decap Attack reportedly doesn't work on Nomad but works on must + // other MD revisions (different pull-up strength?). + u32 mask = 0x3f; + if (CYCLES_GE(padTHLatency[i], SekCyclesDone()+100)) + padTHLatency[i] = SekCyclesDone(); // kludge to cope with cycle wrap + if (CYCLES_GE(SekCyclesDone(), padTHLatency[i])) { + mask |= 0x40; + padTHLatency[i] = SekCyclesDone(); + } + out |= mask & ~ctrl_reg; in = port_readers[i](i, out); @@ -262,7 +441,10 @@ void PicoSetInputDevice(int port, enum input_device device) if (port < 0 || port > 2) return; - switch (device) { + if (port == 1 && port_readers[0] == read_pad_team) + func = read_nothing; + + else switch (device) { case PICO_INPUT_PAD_3BTN: func = read_pad_3btn; break; @@ -271,6 +453,14 @@ void PicoSetInputDevice(int port, enum input_device device) func = read_pad_6btn; break; + case PICO_INPUT_PAD_TEAM: + func = read_pad_team; + break; + + case PICO_INPUT_PAD_4WAY: + func = read_pad_4way; + break; + default: func = read_nothing; break; @@ -288,7 +478,7 @@ NOINLINE u32 io_ports_read(u32 a) case 1: d = port_read(0); break; case 2: d = port_read(1); break; case 3: d = port_read(2); break; - default: d = Pico.ioports[a]; break; // IO ports can be used as RAM + default: d = PicoMem.ioports[a]; break; // IO ports can be used as RAM } return d; } @@ -301,37 +491,62 @@ NOINLINE void io_ports_write(u32 a, u32 d) if (1 <= a && a <= 2) { Pico.m.padDelay[a - 1] = 0; - if (!(Pico.ioports[a] & 0x40) && (d & 0x40)) + if (port_readers[a - 1] == read_pad_team) { + if (d & 0x40) + Pico.m.padTHPhase[a - 1] = 0; + else if ((d^PicoMem.ioports[a]) & 0x60) + Pico.m.padTHPhase[a - 1]++; + } else if (port_readers[0] == read_pad_4way) { + if (a == 2 && ((PicoMem.ioports[a] ^ d) & 0x70)) + Pico.m.padTHPhase[0] = 0; + if (a == 1 && !(PicoMem.ioports[a] & 0x40) && (d & 0x40)) + Pico.m.padTHPhase[0]++; + } else if (!(PicoMem.ioports[a] & 0x40) && (d & 0x40)) Pico.m.padTHPhase[a - 1]++; } + // after switching TH to input there's a latency before the pullup value is + // read back as input (see Decap Attack, not in Samurai Showdown, 32x WWF Raw) + if (4 <= a && a <= 5) { + if ((PicoMem.ioports[a] & 0x40) && !(d & 0x40) && !(PicoMem.ioports[a - 3] & 0x40)) + // latency after switching to input and output was low + padTHLatency[a - 4] = SekCyclesDone() + 25; + } + // certain IO ports can be used as RAM - Pico.ioports[a] = d; + PicoMem.ioports[a] = d; } -// lame.. static int z80_cycles_from_68k(void) { - return z80_cycle_aim - + cycles_68k_to_z80(SekCyclesDone() - last_z80_sync); + int m68k_cnt = SekCyclesDone() - Pico.t.m68c_frame_start; + return cycles_68k_to_z80(m68k_cnt); } void NOINLINE ctl_write_z80busreq(u32 d) { d&=1; d^=1; - elprintf(EL_BUSREQ, "set_zrun: %i->%i [%i] @%06x", Pico.m.z80Run, d, SekCyclesDone(), SekPc); + elprintf(EL_BUSREQ, "set_zrun: %i->%i [%u] @%06x", Pico.m.z80Run, d, SekCyclesDone(), SekPc); if (d ^ Pico.m.z80Run) { if (d) { - z80_cycle_cnt = z80_cycles_from_68k(); + Pico.t.z80c_aim = Pico.t.z80c_cnt = z80_cycles_from_68k() + 2; + Pico.t.z80c_cnt += Pico.t.z80_busdelay >> 8; + Pico.t.z80_busdelay &= 0xff; } else { - if ((PicoOpt&POPT_EN_Z80) && !Pico.m.z80_reset) { + if ((PicoIn.opt & POPT_EN_Z80) && !Pico.m.z80_reset) { + // Z80 grants bus after the current M cycle, even within an insn + // simulate this by accumulating the last insn overhang in busdelay + unsigned granted; pprof_start(m68k); PicoSyncZ80(SekCyclesDone()); pprof_end_sub(m68k); + granted = Pico.t.z80c_aim + 6; // M cycle is 3-6 cycles + Pico.t.z80_busdelay += (Pico.t.z80c_cnt - granted) << 8; + Pico.t.z80c_cnt = granted; } } Pico.m.z80Run = d; @@ -341,28 +556,41 @@ void NOINLINE ctl_write_z80busreq(u32 d) void NOINLINE ctl_write_z80reset(u32 d) { d&=1; d^=1; - elprintf(EL_BUSREQ, "set_zreset: %i->%i [%i] @%06x", Pico.m.z80_reset, d, SekCyclesDone(), SekPc); + elprintf(EL_BUSREQ, "set_zreset: %i->%i [%u] @%06x", Pico.m.z80_reset, d, SekCyclesDone(), SekPc); if (d ^ Pico.m.z80_reset) { if (d) { - if ((PicoOpt&POPT_EN_Z80) && Pico.m.z80Run) { + if ((PicoIn.opt & POPT_EN_Z80) && Pico.m.z80Run) { pprof_start(m68k); PicoSyncZ80(SekCyclesDone()); pprof_end_sub(m68k); } + Pico.t.z80_busdelay &= 0xff; // also resets bus request YM2612ResetChip(); timers_reset(); } else { - z80_cycle_cnt = z80_cycles_from_68k(); + Pico.t.z80c_aim = Pico.t.z80c_cnt = z80_cycles_from_68k() + 2; z80_reset(); } Pico.m.z80_reset = d; } } +static void psg_write_68k(u32 d) +{ + PsndDoPSG(z80_cycles_from_68k()); + SN76496Write(d); +} + +static void psg_write_z80(u32 d) +{ + PsndDoPSG(z80_cyclesDone()); + SN76496Write(d); +} + // ----------------------------------------------------------------- #ifndef _ASM_MEMORY_C @@ -371,21 +599,22 @@ void NOINLINE ctl_write_z80reset(u32 d) static u32 PicoRead8_sram(u32 a) { u32 d; - if (SRam.start <= a && a <= SRam.end && (Pico.m.sram_reg & SRR_MAPPED)) + if (Pico.sv.start <= a && a <= Pico.sv.end && (Pico.m.sram_reg & SRR_MAPPED)) { - if (SRam.flags & SRF_EEPROM) { + if (Pico.sv.flags & SRF_EEPROM) { d = EEPROM_read(); if (!(a & 1)) d >>= 8; + d &= 0xff; } else - d = *(u8 *)(SRam.data - SRam.start + a); + d = *(u8 *)(Pico.sv.data - Pico.sv.start + a); elprintf(EL_SRAMIO, "sram r8 [%06x] %02x @ %06x", a, d, SekPc); return d; } // XXX: this is banking unfriendly if (a < Pico.romsize) - return Pico.rom[a ^ 1]; + return Pico.rom[MEM_BE2(a)]; return m68k_unmapped_read8(a); } @@ -393,12 +622,12 @@ static u32 PicoRead8_sram(u32 a) static u32 PicoRead16_sram(u32 a) { u32 d; - if (SRam.start <= a && a <= SRam.end && (Pico.m.sram_reg & SRR_MAPPED)) + if (Pico.sv.start <= a && a <= Pico.sv.end && (Pico.m.sram_reg & SRR_MAPPED)) { - if (SRam.flags & SRF_EEPROM) + if (Pico.sv.flags & SRF_EEPROM) d = EEPROM_read(); else { - u8 *pm = (u8 *)(SRam.data - SRam.start + a); + u8 *pm = (u8 *)(Pico.sv.data - Pico.sv.start + a); d = pm[0] << 8; d |= pm[1]; } @@ -416,20 +645,20 @@ static u32 PicoRead16_sram(u32 a) static void PicoWrite8_sram(u32 a, u32 d) { - if (a > SRam.end || a < SRam.start || !(Pico.m.sram_reg & SRR_MAPPED)) { + if (a > Pico.sv.end || a < Pico.sv.start || !(Pico.m.sram_reg & SRR_MAPPED)) { m68k_unmapped_write8(a, d); return; } elprintf(EL_SRAMIO, "sram w8 [%06x] %02x @ %06x", a, d & 0xff, SekPc); - if (SRam.flags & SRF_EEPROM) + if (Pico.sv.flags & SRF_EEPROM) { EEPROM_write8(a, d); } else { - u8 *pm = (u8 *)(SRam.data - SRam.start + a); + u8 *pm = (u8 *)(Pico.sv.data - Pico.sv.start + a); if (*pm != (u8)d) { - SRam.changed = 1; + Pico.sv.changed = 1; *pm = (u8)d; } } @@ -437,22 +666,25 @@ static void PicoWrite8_sram(u32 a, u32 d) static void PicoWrite16_sram(u32 a, u32 d) { - if (a > SRam.end || a < SRam.start || !(Pico.m.sram_reg & SRR_MAPPED)) { + if (a > Pico.sv.end || a < Pico.sv.start || !(Pico.m.sram_reg & SRR_MAPPED)) { m68k_unmapped_write16(a, d); return; } elprintf(EL_SRAMIO, "sram w16 [%06x] %04x @ %06x", a, d & 0xffff, SekPc); - if (SRam.flags & SRF_EEPROM) + if (Pico.sv.flags & SRF_EEPROM) { EEPROM_write16(d); } else { - // XXX: hardware could easily use MSB too.. - u8 *pm = (u8 *)(SRam.data - SRam.start + a); - if (*pm != (u8)d) { - SRam.changed = 1; - *pm = (u8)d; + u8 *pm = (u8 *)(Pico.sv.data - Pico.sv.start + a); + if (pm[0] != (u8)(d >> 8)) { + Pico.sv.changed = 1; + pm[0] = (u8)(d >> 8); + } + if (pm[1] != (u8)d) { + Pico.sv.changed = 1; + pm[1] = (u8)d; } } } @@ -461,19 +693,22 @@ static void PicoWrite16_sram(u32 a, u32 d) // TODO: verify mirrors VDP and bank reg (bank area mirroring verified) static u32 PicoRead8_z80(u32 a) { - u32 d = 0xff; - if ((Pico.m.z80Run & 1) || Pico.m.z80_reset) { + u32 d; + if ((Pico.m.z80Run | Pico.m.z80_reset | (z80_cycles_from_68k() < Pico.t.z80c_cnt)) && + !(PicoIn.quirks & PQUIRK_NO_Z80_BUS_LOCK)) { elprintf(EL_ANOMALY, "68k z80 read with no bus! [%06x] @ %06x", a, SekPc); - // open bus. Pulled down if MegaCD2 is attached. - return 0; + return (u8)PicoRead16_floating(a); } + SekCyclesBurnRun(1); - if ((a & 0x4000) == 0x0000) - d = Pico.zram[a & 0x1fff]; - else if ((a & 0x6000) == 0x4000) // 0x4000-0x5fff + if ((a & 0x4000) == 0x0000) { + d = PicoMem.zram[a & 0x1fff]; + } else if ((a & 0x6000) == 0x4000) // 0x4000-0x5fff d = ym2612_read_local_68k(); - else + else { elprintf(EL_UIO|EL_ANOMALY, "68k bad read [%06x] @%06x", a, SekPc); + d = (u8)PicoRead16_floating(a); + } return d; } @@ -485,26 +720,25 @@ static u32 PicoRead16_z80(u32 a) static void PicoWrite8_z80(u32 a, u32 d) { - if ((Pico.m.z80Run & 1) || Pico.m.z80_reset) { + if ((Pico.m.z80Run | Pico.m.z80_reset) && !(PicoIn.quirks & PQUIRK_NO_Z80_BUS_LOCK)) { // verified on real hw elprintf(EL_ANOMALY, "68k z80 write with no bus or reset! [%06x] %02x @ %06x", a, d&0xff, SekPc); return; } + SekCyclesBurnRun(1); if ((a & 0x4000) == 0x0000) { // z80 RAM - SekCyclesBurnRun(2); // FIXME hack - Pico.zram[a & 0x1fff] = (u8)d; + PicoMem.zram[a & 0x1fff] = (u8)d; return; } if ((a & 0x6000) == 0x4000) { // FM Sound - if (PicoOpt & POPT_EN_FM) - emustatus |= ym2612_write_local(a&3, d&0xff, 0)&1; + if (PicoIn.opt & POPT_EN_FM) + ym2612_write_local(a & 3, d & 0xff, 0); return; } // TODO: probably other VDP access too? Maybe more mirrors? if ((a & 0x7ff9) == 0x7f11) { // PSG Sound - if (PicoOpt & POPT_EN_PSG) - SN76496Write(d); + psg_write_68k(d); return; } if ((a & 0x7f00) == 0x6000) // Z80 BANK register @@ -537,28 +771,24 @@ u32 PicoRead8_io(u32 a) goto end; } - // faking open bus (MegaCD pulldowns don't work here curiously) - d = Pico.m.rotate++; - d ^= d << 6; + d = PicoRead16_floating(a); if ((a & 0xfc00) == 0x1000) { - // bit8 seems to be readable in this range - if (!(a & 1)) - d &= ~0x01; - if ((a & 0xff01) == 0x1100) { // z80 busreq (verified) - d |= (Pico.m.z80Run | Pico.m.z80_reset) & 1; - elprintf(EL_BUSREQ, "get_zrun: %02x [%i] @%06x", d, SekCyclesDone(), SekPc); + // bit8 seems to be readable in this range + if (!(a & 1)) { + d &= ~0x01; + // Z80 ahead of 68K only if in BUSREQ, BUSACK only after 68K reached Z80 + d |= (z80_cycles_from_68k() < Pico.t.z80c_cnt); + d |= (Pico.m.z80Run | Pico.m.z80_reset) & 1; + elprintf(EL_BUSREQ, "get_zrun: %02x [%u] @%06x", d, SekCyclesDone(), SekPc); + } } goto end; } - if (PicoOpt & POPT_EN_32X) { - d = PicoRead8_32x(a); - goto end; - } + d = PicoRead8_32x(a); - d = m68k_unmapped_read8(a); end: return d; } @@ -573,27 +803,21 @@ u32 PicoRead16_io(u32 a) goto end; } - // faking open bus - d = (Pico.m.rotate += 0x41); - d ^= (d << 5) ^ (d << 8); + d = PicoRead16_floating(a); // bit8 seems to be readable in this range if ((a & 0xfc00) == 0x1000) { - d &= ~0x0100; - if ((a & 0xff00) == 0x1100) { // z80 busreq + d &= ~0x0100; + d |= (z80_cycles_from_68k() < Pico.t.z80c_cnt) << 8; d |= ((Pico.m.z80Run | Pico.m.z80_reset) & 1) << 8; - elprintf(EL_BUSREQ, "get_zrun: %04x [%i] @%06x", d, SekCyclesDone(), SekPc); + elprintf(EL_BUSREQ, "get_zrun: %04x [%u] @%06x", d, SekCyclesDone(), SekPc); } goto end; } - if (PicoOpt & POPT_EN_32X) { - d = PicoRead16_32x(a); - goto end; - } + d = PicoRead16_32x(a); - d = m68k_unmapped_read16(a); end: return d; } @@ -618,12 +842,7 @@ void PicoWrite8_io(u32 a, u32 d) Pico.m.sram_reg |= (u8)(d & 3); return; } - if (PicoOpt & POPT_EN_32X) { - PicoWrite8_32x(a, d); - return; - } - - m68k_unmapped_write8(a, d); + PicoWrite8_32x(a, d); } void PicoWrite16_io(u32 a, u32 d) @@ -646,24 +865,34 @@ void PicoWrite16_io(u32 a, u32 d) Pico.m.sram_reg |= (u8)(d & 3); return; } - if (PicoOpt & POPT_EN_32X) { - PicoWrite16_32x(a, d); - return; - } - m68k_unmapped_write16(a, d); + PicoWrite16_32x(a, d); } #endif // _ASM_MEMORY_C // VDP area (0xc00000 - 0xdfffff) // TODO: verify if lower byte goes to PSG on word writes -static u32 PicoRead8_vdp(u32 a) +u32 PicoRead8_vdp(u32 a) { - if ((a & 0x00e0) == 0x0000) - return PicoVideoRead8(a); - - elprintf(EL_UIO|EL_ANOMALY, "68k bad read [%06x] @%06x", a, SekPc); - return 0; + u32 d; + if ((a & 0x00f0) == 0x0000) { + switch (a & 0x0d) + { + case 0x00: d = PicoVideoRead8DataH(0); break; + case 0x01: d = PicoVideoRead8DataL(0); break; + case 0x04: d = PicoVideoRead8CtlH(0); break; + case 0x05: d = PicoVideoRead8CtlL(0); break; + case 0x08: + case 0x0c: d = PicoVideoRead8HV_H(0); break; + case 0x09: + case 0x0d: d = PicoVideoRead8HV_L(0); break; + default: d = (u8)PicoRead16_floating(a); break; + } + } else { + elprintf(EL_UIO|EL_ANOMALY, "68k bad read [%06x] @%06x", a, SekPc); + d = (u8)PicoRead16_floating(a); + } + return d; } static u32 PicoRead16_vdp(u32 a) @@ -678,8 +907,7 @@ static u32 PicoRead16_vdp(u32 a) static void PicoWrite8_vdp(u32 a, u32 d) { if ((a & 0x00f9) == 0x0011) { // PSG Sound - if (PicoOpt & POPT_EN_PSG) - SN76496Write(d); + psg_write_68k(d); return; } if ((a & 0x00e0) == 0x0000) { @@ -694,8 +922,7 @@ static void PicoWrite8_vdp(u32 a, u32 d) static void PicoWrite16_vdp(u32 a, u32 d) { if ((a & 0x00f9) == 0x0010) { // PSG Sound - if (PicoOpt & POPT_EN_PSG) - SN76496Write(d); + psg_write_68k(d); return; } if ((a & 0x00e0) == 0x0000) { @@ -714,7 +941,7 @@ static void m68k_mem_setup(void); PICO_INTERNAL void PicoMemSetup(void) { - int mask, rs, a; + int mask, rs, sstart, a; // setup the memory map cpu68k_map_set(m68k_read8_map, 0x000000, 0xffffff, m68k_unmapped_read8, 1); @@ -726,19 +953,21 @@ PICO_INTERNAL void PicoMemSetup(void) // align to bank size. We know ROM loader allocated enough for this mask = (1 << M68K_MEM_SHIFT) - 1; rs = (Pico.romsize + mask) & ~mask; + if (rs > 0xa00000) rs = 0xa00000; // max cartridge area cpu68k_map_set(m68k_read8_map, 0x000000, rs - 1, Pico.rom, 0); cpu68k_map_set(m68k_read16_map, 0x000000, rs - 1, Pico.rom, 0); // Common case of on-cart (save) RAM, usually at 0x200000-... - if ((SRam.flags & SRF_ENABLED) && SRam.data != NULL) { - rs = SRam.end - SRam.start; + if ((Pico.sv.flags & SRF_ENABLED) && Pico.sv.data != NULL) { + sstart = Pico.sv.start & ~mask; + rs = Pico.sv.end - sstart; rs = (rs + mask) & ~mask; - if (SRam.start + rs >= 0x1000000) - rs = 0x1000000 - SRam.start; - cpu68k_map_set(m68k_read8_map, SRam.start, SRam.start + rs - 1, PicoRead8_sram, 1); - cpu68k_map_set(m68k_read16_map, SRam.start, SRam.start + rs - 1, PicoRead16_sram, 1); - cpu68k_map_set(m68k_write8_map, SRam.start, SRam.start + rs - 1, PicoWrite8_sram, 1); - cpu68k_map_set(m68k_write16_map, SRam.start, SRam.start + rs - 1, PicoWrite16_sram, 1); + if (sstart + rs >= 0x1000000) + rs = 0x1000000 - sstart; + cpu68k_map_set(m68k_read8_map, sstart, sstart + rs - 1, PicoRead8_sram, 1); + cpu68k_map_set(m68k_read16_map, sstart, sstart + rs - 1, PicoRead16_sram, 1); + cpu68k_map_set(m68k_write8_map, sstart, sstart + rs - 1, PicoWrite8_sram, 1); + cpu68k_map_set(m68k_write16_map, sstart, sstart + rs - 1, PicoWrite16_sram, 1); } // Z80 region @@ -765,10 +994,10 @@ PICO_INTERNAL void PicoMemSetup(void) // RAM and it's mirrors for (a = 0xe00000; a < 0x1000000; a += 0x010000) { - cpu68k_map_set(m68k_read8_map, a, a + 0xffff, Pico.ram, 0); - cpu68k_map_set(m68k_read16_map, a, a + 0xffff, Pico.ram, 0); - cpu68k_map_set(m68k_write8_map, a, a + 0xffff, Pico.ram, 0); - cpu68k_map_set(m68k_write16_map, a, a + 0xffff, Pico.ram, 0); + cpu68k_map_set(m68k_read8_map, a, a + 0xffff, PicoMem.ram, 0); + cpu68k_map_set(m68k_read16_map, a, a + 0xffff, PicoMem.ram, 0); + cpu68k_map_set(m68k_write8_map, a, a + 0xffff, PicoMem.ram, 0); + cpu68k_map_set(m68k_write16_map, a, a + 0xffff, PicoMem.ram, 0); } // Setup memory callbacks: @@ -785,26 +1014,12 @@ PICO_INTERNAL void PicoMemSetup(void) PicoCpuCM68k.fetch32 = NULL; #endif #ifdef EMU_F68K - PicoCpuFM68k.read_byte = m68k_read8; - PicoCpuFM68k.read_word = m68k_read16; - PicoCpuFM68k.read_long = m68k_read32; - PicoCpuFM68k.write_byte = m68k_write8; - PicoCpuFM68k.write_word = m68k_write16; - PicoCpuFM68k.write_long = m68k_write32; - - // setup FAME fetchmap - { - int i; - // by default, point everything to first 64k of ROM - for (i = 0; i < M68K_FETCHBANK1; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.rom - (i<<(24-FAMEC_FETCHBITS)); - // now real ROM - for (i = 0; i < M68K_FETCHBANK1 && (i<<(24-FAMEC_FETCHBITS)) < Pico.romsize; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.rom; - // .. and RAM - for (i = M68K_FETCHBANK1*14/16; i < M68K_FETCHBANK1; i++) - PicoCpuFM68k.Fetch[i] = (unsigned long)Pico.ram - (i<<(24-FAMEC_FETCHBITS)); - } + PicoCpuFM68k.read_byte = (void *)m68k_read8; + PicoCpuFM68k.read_word = (void *)m68k_read16; + PicoCpuFM68k.read_long = (void *)m68k_read32; + PicoCpuFM68k.write_byte = (void *)m68k_write8; + PicoCpuFM68k.write_word = (void *)m68k_write16; + PicoCpuFM68k.write_long = (void *)m68k_write32; #endif #ifdef EMU_M68K m68k_mem_setup(); @@ -846,91 +1061,122 @@ static void m68k_mem_setup(void) static int get_scanline(int is_from_z80) { if (is_from_z80) { - int cycles = z80_cyclesDone(); - while (cycles - z80_scanline_cycles >= 228) - z80_scanline++, z80_scanline_cycles += 228; - return z80_scanline; + // ugh... compute by dividing cycles since frame start by cycles per line + // need some fractional resolution here, else there may be an extra line + int cycles_line = cycles_68k_to_z80((unsigned)(488.5*256))+1; // cycles per line, Q8 + int cycles_z80 = (z80_cyclesLeft<0 ? Pico.t.z80c_aim:z80_cyclesDone())<<8; + int cycles = cycles_line * Pico.t.z80_scanline; + // approximation by multiplying with inverse + if (cycles_z80 - cycles >= 4*cycles_line) { + // compute 1/cycles_line, storing the result to avoid future dividing + static int cycles_line_o, cycles_line_i; + if (cycles_line_o != cycles_line) + { cycles_line_o = cycles_line, cycles_line_i = (1<<22) / cycles_line; } + // compute lines = diff/cycles_line = diff*(1/cycles_line) + int lines = ((cycles_z80 - cycles) * cycles_line_i) >> 22; + Pico.t.z80_scanline += lines, cycles += cycles_line * lines; + } + // handle any rounding leftover + while (cycles_z80 - cycles >= cycles_line) + Pico.t.z80_scanline ++, cycles += cycles_line; + return Pico.t.z80_scanline; } return Pico.m.scanline; } +#define ym2612_update_status(xcycles) \ + ym2612.OPN.ST.status &= ~0x80; \ + ym2612.OPN.ST.status |= (xcycles < Pico.t.ym2612_busy) * 0x80; \ + if (xcycles >= Pico.t.timer_a_next_oflow) \ + ym2612.OPN.ST.status |= (ym2612.OPN.ST.mode >> 2) & 1; \ + if (xcycles >= Pico.t.timer_b_next_oflow) \ + ym2612.OPN.ST.status |= (ym2612.OPN.ST.mode >> 2) & 2 + /* probably should not be in this file, but it's near related code here */ void ym2612_sync_timers(int z80_cycles, int mode_old, int mode_new) { int xcycles = z80_cycles << 8; - /* check for overflows */ - if ((mode_old & 4) && xcycles > timer_a_next_oflow) - ym2612.OPN.ST.status |= 1; + // update timer status + ym2612_update_status(xcycles); - if ((mode_old & 8) && xcycles > timer_b_next_oflow) - ym2612.OPN.ST.status |= 2; - - /* update timer a */ + // update timer a if (mode_old & 1) - while (xcycles > timer_a_next_oflow) - timer_a_next_oflow += timer_a_step; + while (xcycles >= Pico.t.timer_a_next_oflow) + Pico.t.timer_a_next_oflow += Pico.t.timer_a_step; - if ((mode_old ^ mode_new) & 1) // turning on/off + // turning on/off + if ((mode_old ^ mode_new) & 1) { if (mode_old & 1) - timer_a_next_oflow = TIMER_NO_OFLOW; - else - timer_a_next_oflow = xcycles + timer_a_step; + Pico.t.timer_a_next_oflow = TIMER_NO_OFLOW; + else { + /* The internal tick of the YM2612 takes 144 clock cycles (with clock + * being OSC/7), or 67.2 z80 cycles. Timers are run once each tick. + * Starting a timer takes place at the next tick, so xcycles needs to be + * rounded up to that: t = next tick# = (xcycles / TICK_ZCYCLES) + 1 + */ + unsigned t = ((xcycles * (((1LL<<32)/TIMER_A_TICK_ZCYCLES)+1))>>32) + 1; + Pico.t.timer_a_next_oflow = t*TIMER_A_TICK_ZCYCLES + Pico.t.timer_a_step; + } } + if (mode_new & 1) - elprintf(EL_YMTIMER, "timer a upd to %i @ %i", timer_a_next_oflow>>8, z80_cycles); + elprintf(EL_YMTIMER, "timer a upd to %i @ %i", Pico.t.timer_a_next_oflow>>8, z80_cycles); - /* update timer b */ + // update timer b if (mode_old & 2) - while (xcycles > timer_b_next_oflow) - timer_b_next_oflow += timer_b_step; + while (xcycles >= Pico.t.timer_b_next_oflow) + Pico.t.timer_b_next_oflow += Pico.t.timer_b_step; + // turning on/off if ((mode_old ^ mode_new) & 2) { if (mode_old & 2) - timer_b_next_oflow = TIMER_NO_OFLOW; - else - timer_b_next_oflow = xcycles + timer_b_step; + Pico.t.timer_b_next_oflow = TIMER_NO_OFLOW; + else { + /* timer b has a divider of 16 which runs in its own counter. It is not + * reset by loading timer b. The first run of timer b after loading is + * therefore shorter by up to 15 ticks. + */ + unsigned t = ((xcycles * (((1LL<<32)/TIMER_A_TICK_ZCYCLES)+1))>>32) + 1; + int step = Pico.t.timer_b_step - TIMER_A_TICK_ZCYCLES*(t&15); + Pico.t.timer_b_next_oflow = t*TIMER_A_TICK_ZCYCLES + step; + } } + if (mode_new & 2) - elprintf(EL_YMTIMER, "timer b upd to %i @ %i", timer_b_next_oflow>>8, z80_cycles); + elprintf(EL_YMTIMER, "timer b upd to %i @ %i", Pico.t.timer_b_next_oflow>>8, z80_cycles); } // ym2612 DAC and timer I/O handlers for z80 static int ym2612_write_local(u32 a, u32 d, int is_from_z80) { + int cycles = is_from_z80 ? z80_cyclesDone() : z80_cycles_from_68k(); int addr; a &= 3; - if (a == 1 && ym2612.OPN.ST.address == 0x2a) /* DAC data */ - { - int scanline = get_scanline(is_from_z80); - //elprintf(EL_STATUS, "%03i -> %03i dac w %08x z80 %i", PsndDacLine, scanline, d, is_from_z80); - ym2612.dacout = ((int)d - 0x80) << 6; - if (PsndOut && ym2612.dacen && scanline >= PsndDacLine) - PsndDoDAC(scanline); - return 0; - } - switch (a) { case 0: /* address port 0 */ + case 2: /* address port 1 */ ym2612.OPN.ST.address = d; - ym2612.addr_A1 = 0; + ym2612.addr_A1 = (a & 2) >> 1; #ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) YM2612Write_940(a, d, -1); + if (PicoIn.opt & POPT_EXT_FM) YM2612Write_940(a, d, -1); #endif return 0; case 1: /* data port 0 */ - if (ym2612.addr_A1 != 0) - return 0; - - addr = ym2612.OPN.ST.address; + case 3: /* data port 1 */ + addr = ym2612.OPN.ST.address | ((int)ym2612.addr_A1 << 8); ym2612.REGS[addr] = d; + // the busy flag in the YM2612 status is actually a 32 cycle timer + // (89.6 Z80 cycles), triggered by any write to the data port. + Pico.t.ym2612_busy = (cycles << 8) + YMBUSY_ZCYCLES; // Q8 for convenience + switch (addr) { case 0x24: // timer A High 8 @@ -939,42 +1185,35 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) : ((ym2612.OPN.ST.TA & 0x3fc)|(d&3)); if (ym2612.OPN.ST.TA != TAnew) { + ym2612_sync_timers(cycles, ym2612.OPN.ST.mode, ym2612.OPN.ST.mode); //elprintf(EL_STATUS, "timer a set %i", TAnew); ym2612.OPN.ST.TA = TAnew; //ym2612.OPN.ST.TAC = (1024-TAnew)*18; //ym2612.OPN.ST.TAT = 0; - timer_a_step = TIMER_A_TICK_ZCYCLES * (1024 - TAnew); - if (ym2612.OPN.ST.mode & 1) { - // this is not right, should really be done on overflow only - int cycles = is_from_z80 ? z80_cyclesDone() : z80_cycles_from_68k(); - timer_a_next_oflow = (cycles << 8) + timer_a_step; - } - elprintf(EL_YMTIMER, "timer a set to %i, %i", 1024 - TAnew, timer_a_next_oflow>>8); + Pico.t.timer_a_step = TIMER_A_TICK_ZCYCLES * (1024 - TAnew); + elprintf(EL_YMTIMER, "timer a set to %i, %i", 1024 - TAnew, Pico.t.timer_a_next_oflow>>8); } return 0; } case 0x26: // timer B if (ym2612.OPN.ST.TB != d) { + ym2612_sync_timers(cycles, ym2612.OPN.ST.mode, ym2612.OPN.ST.mode); //elprintf(EL_STATUS, "timer b set %i", d); ym2612.OPN.ST.TB = d; //ym2612.OPN.ST.TBC = (256-d) * 288; //ym2612.OPN.ST.TBT = 0; - timer_b_step = TIMER_B_TICK_ZCYCLES * (256 - d); // 262800 - if (ym2612.OPN.ST.mode & 2) { - int cycles = is_from_z80 ? z80_cyclesDone() : z80_cycles_from_68k(); - timer_b_next_oflow = (cycles << 8) + timer_b_step; - } - elprintf(EL_YMTIMER, "timer b set to %i, %i", 256 - d, timer_b_next_oflow>>8); + Pico.t.timer_b_step = TIMER_B_TICK_ZCYCLES * (256 - d); + elprintf(EL_YMTIMER, "timer b set to %i, %i", 256 - d, Pico.t.timer_b_next_oflow>>8); } return 0; case 0x27: { /* mode, timer control */ int old_mode = ym2612.OPN.ST.mode; - int cycles = is_from_z80 ? z80_cyclesDone() : z80_cycles_from_68k(); - ym2612.OPN.ST.mode = d; elprintf(EL_YMTIMER, "st mode %02x", d); ym2612_sync_timers(cycles, old_mode, d); + ym2612.OPN.ST.mode = d; + /* reset Timer a flag */ if (d & 0x10) ym2612.OPN.ST.status &= ~1; @@ -985,63 +1224,49 @@ static int ym2612_write_local(u32 a, u32 d, int is_from_z80) if ((d ^ old_mode) & 0xc0) { #ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) return YM2612Write_940(a, d, get_scanline(is_from_z80)); + if (PicoIn.opt & POPT_EXT_FM) return YM2612Write_940(a, d, get_scanline(is_from_z80)); #endif + PsndDoFM(cycles); return 1; } return 0; } + case 0x2a: { /* DAC data */ + //elprintf(EL_STATUS, "%03i dac w %08x z80 %i", cycles, d, is_from_z80); + if (ym2612.dacen) + PsndDoDAC(cycles); + ym2612.dacout = ((int)d - 0x80) << 6; + return 0; + } case 0x2b: { /* DAC Sel (YM2612) */ - int scanline = get_scanline(is_from_z80); ym2612.dacen = d & 0x80; - if (d & 0x80) PsndDacLine = scanline; #ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) YM2612Write_940(a, d, scanline); + if (PicoIn.opt & POPT_EXT_FM) YM2612Write_940(a, d, get_scanline(is_from_z80)); #endif return 0; } } break; - - case 2: /* address port 1 */ - ym2612.OPN.ST.address = d; - ym2612.addr_A1 = 1; -#ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) YM2612Write_940(a, d, -1); -#endif - return 0; - - case 3: /* data port 1 */ - if (ym2612.addr_A1 != 1) - return 0; - - addr = ym2612.OPN.ST.address | 0x100; - ym2612.REGS[addr] = d; - break; } #ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) + if (PicoIn.opt & POPT_EXT_FM) return YM2612Write_940(a, d, get_scanline(is_from_z80)); #endif + PsndDoFM(cycles); return YM2612Write_(a, d); } -#define ym2612_read_local() \ - if (xcycles >= timer_a_next_oflow) \ - ym2612.OPN.ST.status |= (ym2612.OPN.ST.mode >> 2) & 1; \ - if (xcycles >= timer_b_next_oflow) \ - ym2612.OPN.ST.status |= (ym2612.OPN.ST.mode >> 2) & 2 - static u32 ym2612_read_local_z80(void) { int xcycles = z80_cyclesDone() << 8; - ym2612_read_local(); + ym2612_update_status(xcycles); - elprintf(EL_YMTIMER, "timer z80 read %i, sched %i, %i @ %i|%i", ym2612.OPN.ST.status, - timer_a_next_oflow>>8, timer_b_next_oflow>>8, xcycles >> 8, (xcycles >> 8) / 228); + elprintf(EL_YMTIMER, "timer z80 read %i, sched %i, %i @ %i|%i", + ym2612.OPN.ST.status, Pico.t.timer_a_next_oflow >> 8, + Pico.t.timer_b_next_oflow >> 8, xcycles >> 8, (xcycles >> 8) / 228); return ym2612.OPN.ST.status; } @@ -1049,37 +1274,42 @@ static u32 ym2612_read_local_68k(void) { int xcycles = z80_cycles_from_68k() << 8; - ym2612_read_local(); + ym2612_update_status(xcycles); - elprintf(EL_YMTIMER, "timer 68k read %i, sched %i, %i @ %i|%i", ym2612.OPN.ST.status, - timer_a_next_oflow>>8, timer_b_next_oflow>>8, xcycles >> 8, (xcycles >> 8) / 228); + elprintf(EL_YMTIMER, "timer 68k read %i, sched %i, %i @ %i|%i", + ym2612.OPN.ST.status, Pico.t.timer_a_next_oflow >> 8, + Pico.t.timer_b_next_oflow >> 8, xcycles >> 8, (xcycles >> 8) / 228); return ym2612.OPN.ST.status; } void ym2612_pack_state(void) { // timers are saved as tick counts, in 16.16 int format - int tac, tat = 0, tbc, tbt = 0; + int tac, tat = 0, tbc, tbt = 0, busy = 0; tac = 1024 - ym2612.OPN.ST.TA; tbc = 256 - ym2612.OPN.ST.TB; - if (timer_a_next_oflow != TIMER_NO_OFLOW) - tat = (int)((double)(timer_a_step - timer_a_next_oflow) / (double)timer_a_step * tac * 65536); - if (timer_b_next_oflow != TIMER_NO_OFLOW) - tbt = (int)((double)(timer_b_step - timer_b_next_oflow) / (double)timer_b_step * tbc * 65536); + if (Pico.t.ym2612_busy > 0) + busy = cycles_z80_to_68k(Pico.t.ym2612_busy); + if (Pico.t.timer_a_next_oflow != TIMER_NO_OFLOW) + tat = (int)((double)(Pico.t.timer_a_step - Pico.t.timer_a_next_oflow) + / (double)Pico.t.timer_a_step * tac * 65536); + if (Pico.t.timer_b_next_oflow != TIMER_NO_OFLOW) + tbt = (int)((double)(Pico.t.timer_b_step - Pico.t.timer_b_next_oflow) + / (double)Pico.t.timer_b_step * tbc * 65536); elprintf(EL_YMTIMER, "save: timer a %i/%i", tat >> 16, tac); elprintf(EL_YMTIMER, "save: timer b %i/%i", tbt >> 16, tbc); #ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) + if (PicoIn.opt & POPT_EXT_FM) YM2612PicoStateSave2_940(tat, tbt); else #endif - YM2612PicoStateSave2(tat, tbt); + YM2612PicoStateSave2(tat, tbt, busy); } void ym2612_unpack_state(void) { - int i, ret, tac, tat, tbc, tbt; + int i, ret, tac, tat, tbc, tbt, busy = 0; YM2612PicoStateLoad(); // feed all the registers and update internal state @@ -1105,28 +1335,29 @@ void ym2612_unpack_state(void) } #ifdef __GP2X__ - if (PicoOpt & POPT_EXT_FM) + if (PicoIn.opt & POPT_EXT_FM) ret = YM2612PicoStateLoad2_940(&tat, &tbt); else #endif - ret = YM2612PicoStateLoad2(&tat, &tbt); + ret = YM2612PicoStateLoad2(&tat, &tbt, &busy); if (ret != 0) { elprintf(EL_STATUS, "old ym2612 state"); return; // no saved timers } + Pico.t.ym2612_busy = cycles_68k_to_z80(busy); tac = (1024 - ym2612.OPN.ST.TA) << 16; tbc = (256 - ym2612.OPN.ST.TB) << 16; if (ym2612.OPN.ST.mode & 1) - timer_a_next_oflow = (int)((double)(tac - tat) / (double)tac * timer_a_step); + Pico.t.timer_a_next_oflow = (int)((double)(tac - tat) / (double)tac * Pico.t.timer_a_step); else - timer_a_next_oflow = TIMER_NO_OFLOW; + Pico.t.timer_a_next_oflow = TIMER_NO_OFLOW; if (ym2612.OPN.ST.mode & 2) - timer_b_next_oflow = (int)((double)(tbc - tbt) / (double)tbc * timer_b_step); + Pico.t.timer_b_next_oflow = (int)((double)(tbc - tbt) / (double)tbc * Pico.t.timer_b_step); else - timer_b_next_oflow = TIMER_NO_OFLOW; - elprintf(EL_YMTIMER, "load: %i/%i, timer_a_next_oflow %i", tat>>16, tac>>16, timer_a_next_oflow >> 8); - elprintf(EL_YMTIMER, "load: %i/%i, timer_b_next_oflow %i", tbt>>16, tbc>>16, timer_b_next_oflow >> 8); + Pico.t.timer_b_next_oflow = TIMER_NO_OFLOW; + elprintf(EL_YMTIMER, "load: %i/%i, timer_a_next_oflow %i", tat>>16, tac>>16, Pico.t.timer_a_next_oflow >> 8); + elprintf(EL_YMTIMER, "load: %i/%i, timer_b_next_oflow %i", tbt>>16, tbc>>16, Pico.t.timer_b_next_oflow >> 8); } #if defined(NO_32X) && defined(_ASM_MEMORY_C) @@ -1140,9 +1371,39 @@ void PicoWrite16_32x(u32 a, u32 d) {} // ----------------------------------------------------------------- // z80 memhandlers +static void access_68k_bus(int delay) // bus delay as Q8 +{ + // TODO: if the 68K is in DMA wait, Z80 has to wait until DMA ends + + // 68k bus access delay for z80. The fractional part needs to be accumulated + // until an additional cycle is full. That is then added to the integer part. + Pico.t.z80_busdelay += (delay&0xff); // accumulate + z80_subCLeft((delay>>8) + (Pico.t.z80_busdelay>>8)); + Pico.t.z80_busdelay &= 0xff; // leftover cycle fraction + // don't use SekCyclesBurn() here since the Z80 doesn't run in cycle lock to + // the 68K. Count the stolen cycles to be accounted later in the 68k CPU runs + Pico.t.z80_buscycles += 8; // TODO <=8.4 for Rick 2, but >=8.9 for misc_test +} + static unsigned char z80_md_vdp_read(unsigned short a) { - // TODO? + if ((a & 0xff00) == 0x7f00) { + // 68k bus access delay=3.3 per kabuto, for notaz picotest 2.422.42) - 0x292(<2.57) + + switch (a & 0x0d) + { + case 0x00: return PicoVideoRead8DataH(1); + case 0x01: return PicoVideoRead8DataL(1); + case 0x04: return PicoVideoRead8CtlH(1); + case 0x05: return PicoVideoRead8CtlL(1); + case 0x08: + case 0x0c: return PicoVideoGetV(get_scanline(1), 1); + case 0x09: + case 0x0d: return Pico.m.rotate++; + } + } + elprintf(EL_ANOMALY, "z80 invalid r8 [%06x] %02x", a, 0xff); return 0xff; } @@ -1150,12 +1411,16 @@ static unsigned char z80_md_vdp_read(unsigned short a) static unsigned char z80_md_bank_read(unsigned short a) { unsigned int addr68k; - unsigned char ret; + unsigned char ret = 0xff; - addr68k = Pico.m.z80_bank68k<<15; - addr68k += a & 0x7fff; + // 68k bus access delay=3.3 per kabuto, but for notaz picotest 3.023.02)-0x351(<3.32) - ret = m68k_read8(addr68k); + addr68k = Pico.m.z80_bank68k << 15; + addr68k |= a & 0x7fff; + + if (addr68k < 0xe00000) // can't read from 68K RAM + ret = m68k_read8(addr68k); elprintf(EL_Z80BNK, "z80->68k r8 [%06x] %02x", addr68k, ret); return ret; @@ -1163,19 +1428,18 @@ static unsigned char z80_md_bank_read(unsigned short a) static void z80_md_ym2612_write(unsigned int a, unsigned char data) { - if (PicoOpt & POPT_EN_FM) - emustatus |= ym2612_write_local(a, data, 1) & 1; + if (PicoIn.opt & POPT_EN_FM) + ym2612_write_local(a, data, 1); } static void z80_md_vdp_br_write(unsigned int a, unsigned char data) { - // TODO: allow full VDP access if ((a&0xfff9) == 0x7f11) // 7f11 7f13 7f15 7f17 { - if (PicoOpt & POPT_EN_PSG) - SN76496Write(data); + psg_write_z80(data); return; } + // at least VDP data writes hang my machine if ((a>>8) == 0x60) { @@ -1192,6 +1456,9 @@ static void z80_md_bank_write(unsigned int a, unsigned char data) { unsigned int addr68k; + // 68k bus access delay=3.3 per kabuto, but for notaz picotest 3.023.02)-0x351(<3.32) + addr68k = Pico.m.z80_bank68k << 15; addr68k += a & 0x7fff; @@ -1214,14 +1481,14 @@ static void z80_md_out(unsigned short p, unsigned char d) static void z80_mem_setup(void) { - z80_map_set(z80_read_map, 0x0000, 0x1fff, Pico.zram, 0); - z80_map_set(z80_read_map, 0x2000, 0x3fff, Pico.zram, 0); + z80_map_set(z80_read_map, 0x0000, 0x1fff, PicoMem.zram, 0); + z80_map_set(z80_read_map, 0x2000, 0x3fff, PicoMem.zram, 0); z80_map_set(z80_read_map, 0x4000, 0x5fff, ym2612_read_local_z80, 1); z80_map_set(z80_read_map, 0x6000, 0x7fff, z80_md_vdp_read, 1); z80_map_set(z80_read_map, 0x8000, 0xffff, z80_md_bank_read, 1); - z80_map_set(z80_write_map, 0x0000, 0x1fff, Pico.zram, 0); - z80_map_set(z80_write_map, 0x2000, 0x3fff, Pico.zram, 0); + z80_map_set(z80_write_map, 0x0000, 0x1fff, PicoMem.zram, 0); + z80_map_set(z80_write_map, 0x2000, 0x3fff, PicoMem.zram, 0); z80_map_set(z80_write_map, 0x4000, 0x5fff, z80_md_ym2612_write, 1); z80_map_set(z80_write_map, 0x6000, 0x7fff, z80_md_vdp_br_write, 1); z80_map_set(z80_write_map, 0x8000, 0xffff, z80_md_bank_write, 1); @@ -1231,8 +1498,6 @@ static void z80_mem_setup(void) drZ80.z80_out = z80_md_out; #endif #ifdef _USE_CZ80 - Cz80_Set_Fetch(&CZ80, 0x0000, 0x1fff, (FPTR)Pico.zram); // main RAM - Cz80_Set_Fetch(&CZ80, 0x2000, 0x3fff, (FPTR)Pico.zram); // mirror Cz80_Set_INPort(&CZ80, z80_md_in); Cz80_Set_OUTPort(&CZ80, z80_md_out); #endif diff --git a/pico/memory.h b/pico/memory.h index afca0826..27e2940f 100644 --- a/pico/memory.h +++ b/pico/memory.h @@ -1,9 +1,6 @@ // memory map related stuff -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -typedef unsigned long uptr; // unsigned pointer-sized int +#include "pico_port.h" #define M68K_MEM_SHIFT 16 // minimum size we can map @@ -28,22 +25,34 @@ typedef void (cpu68k_write_f)(u32 a, u32 d); extern u32 m68k_read8(u32 a); extern u32 m68k_read16(u32 a); +extern u32 m68k_read32(u32 a); extern void m68k_write8(u32 a, u8 d); extern void m68k_write16(u32 a, u16 d); +extern void m68k_write32(u32 a, u32 d); + +extern u32 s68k_read8(u32 a); +extern u32 s68k_read16(u32 a); +extern u32 s68k_read32(u32 a); +extern void s68k_write8(u32 a, u8 d); +extern void s68k_write16(u32 a, u16 d); +extern void s68k_write32(u32 a, u32 d); // z80 -#define Z80_MEM_SHIFT 13 +#define Z80_MEM_SHIFT 10 // must be <=10 to allow 1KB pages for SMS Sega mapper extern uptr z80_read_map [0x10000 >> Z80_MEM_SHIFT]; extern uptr z80_write_map[0x10000 >> Z80_MEM_SHIFT]; typedef unsigned char (z80_read_f)(unsigned short a); typedef void (z80_write_f)(unsigned int a, unsigned char data); -void z80_map_set(uptr *map, int start_addr, int end_addr, +void z80_map_set(uptr *map, u16 start_addr, u16 end_addr, const void *func_or_mh, int is_func); -void cpu68k_map_set(uptr *map, int start_addr, int end_addr, +void cpu68k_map_set(uptr *map, u32 start_addr, u32 end_addr, const void *func_or_mh, int is_func); -void cpu68k_map_all_ram(int start_addr, int end_addr, void *ptr, int is_sub); -void m68k_map_unmap(int start_addr, int end_addr); +void cpu68k_map_read_mem(u32 start_addr, u32 end_addr, void *ptr, int is_sub); +void cpu68k_map_all_ram(u32 start_addr, u32 end_addr, void *ptr, int is_sub); +void cpu68k_map_read_funcs(u32 start_addr, u32 end_addr, u32 (*r8)(u32), u32 (*r16)(u32), int is_sub); +void cpu68k_map_all_funcs(u32 start_addr, u32 end_addr, u32 (*r8)(u32), u32 (*r16)(u32), void (*w8)(u32, u32), void (*w16)(u32, u32), int is_sub); +void m68k_map_unmap(u32 start_addr, u32 end_addr); #define MAP_FLAG ((uptr)1 << (sizeof(uptr) * 8 - 1)) #define map_flag_set(x) ((x) & MAP_FLAG) @@ -57,7 +66,7 @@ u32 name(u32 a) \ if (map_flag_set(v)) \ return ((cpu68k_read_f *)(v << 1))(a); \ else \ - return *(u8 *)((v << 1) + (a ^ 1)); \ + return *(u8 *)((v << 1) + MEM_BE2(a)); \ } #define MAKE_68K_READ16(name, map) \ @@ -100,7 +109,7 @@ void name(u32 a, u8 d) \ if (map_flag_set(v)) \ ((cpu68k_write_f *)(v << 1))(a, d); \ else \ - *(u8 *)((v << 1) + (a ^ 1)) = d; \ + *(u8 *)((v << 1) + MEM_BE2(a)) = d; \ } #define MAKE_68K_WRITE16(name, map) \ @@ -133,6 +142,25 @@ void name(u32 a, u32 d) \ } \ } +#ifdef NEED_DMA_SOURCE // meh + +static __inline void *m68k_dma_source(u32 a) +{ + u8 *base; + uptr v; + v = m68k_read16_map[a >> M68K_MEM_SHIFT]; + if (map_flag_set(v)) { + if (a >= Pico.romsize) // Rom + return NULL; + base = Pico.rom; + } + else + base = (void *)(v << 1); + return base + (a & 0xfe0000); +} + +#endif + // 32x typedef struct { uptr addr; // stores (membase >> 1) or ((handler >> 1) | (1<<31)) diff --git a/pico/memory_amips.s b/pico/memory_amips.S similarity index 93% rename from pico/memory_amips.s rename to pico/memory_amips.S index 4f09198f..7932c2c9 100644 --- a/pico/memory_amips.s +++ b/pico/memory_amips.S @@ -8,6 +8,8 @@ # OUT OF DATE +#include "pico_int_offs.h" + .set noreorder .set noat @@ -184,8 +186,8 @@ m_read32_table: PicoMemReset: - lui $v1, %hi(Pico+0x22204) - lw $v1, %lo(Pico+0x22204)($v1) # romsize + lui $v1, %hi(Pico+OFS_Pico_romsize) + lw $v1, %lo(Pico+OFS_Pico_romsize)($v1) # romsize lui $t0, 8 addu $v1, $t0 addiu $v1, -1 @@ -235,12 +237,11 @@ m_read_neg1: jr $ra addiu $v0, $0, 0xffff -# loads &Pico.rom to $t3 +# loads &Pico to $t3 .macro m_read_rom_try_sram is200000 size - lui $t2, %hi(SRam) - addiu $t2, %lo(SRam) - lui $t3, %hi(Pico+0x22200) - lw $t1, 8($t2) # SRam.end + lui $t2, %hi(Pico) + addiu $t2, %lo(Pico) + lw $t1, OFS_Pico_sv_end($t2) .if \is200000 ins $a0, $0, 19, 13 lui $t4, 0x20 @@ -248,12 +249,11 @@ m_read_neg1: .endif subu $t4, $a0, $t1 bgtz $t4, 1f - addiu $t3, %lo(Pico+0x22200) - lw $t1, 4($t2) # SRam.start + lw $t1, OFS_Pico_sv_start($t2) subu $t4, $t1, $a0 bgtz $t4, 1f nop - lb $t1, 0x11($t3) # Pico.m.sram_reg + lb $t1, OFS_Pico_m_sram_reg($t2) andi $t4, $t1, 5 beqz $t4, 1f nop @@ -288,8 +288,8 @@ m_read_neg1: .endm .macro m_read8_rom sect - lui $t0, %hi(Pico+0x22200) - lw $t0, %lo(Pico+0x22200)($t0) # rom + lui $t0, %hi(Pico+OFS_Pico_rom) + lw $t0, %lo(Pico+OFS_Pico_rom)($t0) # rom xori $a0, 1 ins $a0, $0, 19, 13 .if \sect @@ -388,15 +388,15 @@ m_read8_misc_io: nop m_read8_misc_hwreg: - lui $v0, %hi(Pico+0x2220f) + lui $v0, %hi(Pico+OFS_Pico_m_hardware) jr $ra - lb $v0, %lo(Pico+0x2220f)($v0) + lb $v0, %lo(Pico+OFS_Pico_m_hardware)($v0) m_read8_misc_ioports: - lui $v0, %hi(Pico+0x22000) + lui $v0, %hi(PicoMem+0x22000) ins $v0, $t0, 0, 5 jr $ra - lb $v0, %lo(Pico+0x22000)($v0) + lb $v0, %lo(PicoMem+0x22000)($v0) m_read8_misc2: lui $t0, 0xa1 @@ -423,10 +423,10 @@ m_read8_z80_misc: nop m_read8_fake_ym2612: - lb $v0, %lo(Pico+0x22208)($t0) # Pico.m.rotate + lb $v0, %lo(Pico+OFS_Pico_m_rotate)($t0) addiu $t1, $v0, 1 jr $ra - sb $t1, %lo(Pico+0x22208)($t0) + sb $t1, %lo(Pico+OFS_Pico_m_rotate)($t0) # delay slot friendly .macro m_read8_call16 funcname is_func_ptr=0 @@ -468,15 +468,15 @@ m_read8_vdp: or $t0, $t1 bnez $t0, m_read_null # invalid address nop - j PicoVideoRead8 + j PicoRead8_vdp nop m_read8_ram: - lui $t0, %hi(Pico) + lui $t0, %hi(PicoMem) ins $t0, $a0, 0, 16 xori $t0, 1 jr $ra - lb $v0, %lo(Pico)($t0) + lb $v0, %lo(PicoMem)($t0) m_read8_above_rom: # might still be SRam (Micro Machines, HardBall '95) @@ -486,8 +486,8 @@ m_read8_above_rom: # ############################################################################# .macro m_read16_rom sect - lui $t0, %hi(Pico+0x22200) - lw $t0, %lo(Pico+0x22200)($t0) # rom + lui $t0, %hi(Pico+OFS_Pico_rom) + lw $t0, %lo(Pico+OFS_Pico_rom)($t0) # rom ins $a0, $0, 0, 1 ins $a0, $0, 19, 13 .if \sect @@ -583,11 +583,11 @@ m_read16_vdp: nop m_read16_ram: - lui $t0, %hi(Pico) + lui $t0, %hi(PicoMem) ins $a0, $0, 0, 1 ins $t0, $a0, 0, 16 jr $ra - lh $v0, %lo(Pico)($t0) + lh $v0, %lo(PicoMem)($t0) m_read16_above_rom: # might still be SRam @@ -600,8 +600,8 @@ m_read16_above_rom: # ############################################################################# .macro m_read32_rom sect - lui $t0, %hi(Pico+0x22200) - lw $t0, %lo(Pico+0x22200)($t0) # rom + lui $t0, %hi(Pico+OFS_Pico_rom) + lw $t0, %lo(Pico+OFS_Pico_rom)($t0) # rom ins $a0, $0, 0, 1 ins $a0, $0, 19, 13 .if \sect @@ -723,11 +723,11 @@ m_read32_vdp: m_read32_call16 PicoVideoRead m_read32_ram: - lui $t0, %hi(Pico) + lui $t0, %hi(PicoMem) ins $a0, $0, 0, 1 ins $t0, $a0, 0, 16 - lh $v1, %lo(Pico)($t0) - lh $v0, %lo(Pico+2)($t0) + lh $v1, %lo(PicoMem)($t0) + lh $v0, %lo(PicoMem+2)($t0) jr $ra ins $v0, $v1, 16, 16 @@ -771,11 +771,11 @@ PicoWriteRomHW_SSF2: # u32 a, u32 d bnez $a0, pwr_banking # sram register - lui $t0, %hi(Pico+0x22211) - lb $t1, %lo(Pico+0x22211)($t0) # Pico.m.sram_reg + lui $t0, %hi(Pico+OFS_Pico_m_sram_reg) + lb $t1, %lo(Pico+OFS_Pico_m_sram_reg)($t0) # Pico.m.sram_reg ins $t1, $a1, 0, 2 jr $ra - sb $t1, %lo(Pico+0x22211)($t0) + sb $t1, %lo(Pico+OFS_Pico_m_sram_reg)($t0) pwr_banking: andi $a1, 0x1f diff --git a/pico/memory_arm.s b/pico/memory_arm.S similarity index 52% rename from pico/memory_arm.s rename to pico/memory_arm.S index f6d7f79d..568be822 100644 --- a/pico/memory_arm.s +++ b/pico/memory_arm.S @@ -1,11 +1,15 @@ /* * PicoDrive * (C) notaz, 2006-2009 + * (C) irixxxx, 2019,2020 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ +#include "arm_features.h" +#include "pico_int_offs.h" + .equ SRR_MAPPED, (1 << 0) .equ SRR_READONLY, (1 << 1) .equ SRF_EEPROM, (1 << 1) @@ -21,35 +25,34 @@ .global PicoWrite8_io .global PicoWrite16_io -PicoRead8_sram: @ u32 a, u32 d - ldr r2, =(SRam) - ldr r3, =(Pico+0x22200) - ldr r1, [r2, #8] @ SRam.end + PIC_LDR_INIT() + +PicoRead8_sram: @ u32 a + PIC_LDR(r3, r1, Pico) + ldr r1, [r3, #OFS_Pico_sv_end] cmp r0, r1 bgt m_read8_nosram - ldr r1, [r2, #4] @ SRam.start - cmp r0, r1 + ldr r2, [r3, #OFS_Pico_sv_start] + cmp r0, r2 blt m_read8_nosram - ldrb r1, [r3, #0x11] @ Pico.m.sram_reg + ldrb r1, [r3, #OFS_Pico_m_sram_reg] tst r1, #SRR_MAPPED beq m_read8_nosram - ldr r1, [r2, #0x0c] + ldr r1, [r3, #OFS_Pico_sv_flags] tst r1, #SRF_EEPROM bne m_read8_eeprom - ldr r1, [r2, #4] @ SRam.start - ldr r2, [r2] @ SRam.data - sub r0, r0, r1 - add r0, r0, r2 - ldrb r0, [r0] + ldr r1, [r3, #OFS_Pico_sv_data] + sub r0, r0, r2 + ldrb r0, [r0, r1] bx lr m_read8_nosram: - ldr r1, [r3, #4] @ romsize + ldr r1, [r3, #OFS_Pico_romsize] cmp r0, r1 movgt r0, #0 bxgt lr @ bad location @ XXX: banking unfriendly - ldr r1, [r3] + ldr r1, [r3, #OFS_Pico_rom] eor r0, r0, #1 ldrb r0, [r1, r0] bx lr @@ -60,10 +63,11 @@ m_read8_eeprom: ldmfd sp!,{r1,lr} tst r1, #1 moveq r0, r0, lsr #8 + and r0, r0, #0xff bx lr -PicoRead8_io: @ u32 a, u32 d +PicoRead8_io: @ u32 a bic r2, r0, #0x001f @ most commonly we get i/o port read, cmp r2, #0xa10000 @ so check for it first beq io_ports_read @@ -71,13 +75,13 @@ PicoRead8_io: @ u32 a, u32 d m_read8_not_io: and r2, r0, #0xfc00 cmp r2, #0x1000 - bne m_read8_not_brq + bne PicoRead8_32x - ldr r3, =(Pico+0x22200) + PIC_LDR(r3, r1, Pico) mov r1, r0 - ldr r0, [r3, #8] @ Pico.m.rotate + ldr r0, [r3, #OFS_Pico_m_rotate] add r0, r0, #1 - strb r0, [r3, #8] + strb r0, [r3, #OFS_Pico_m_rotate] eor r0, r0, r0, lsl #6 tst r1, #1 @@ -87,53 +91,42 @@ m_read8_not_io: cmp r2, #0x1100 bxne lr @ not busreq - ldrb r1, [r3, #(8+0x01)] @ Pico.m.z80Run - ldrb r2, [r3, #(8+0x0f)] @ Pico.m.z80_reset + ldrb r1, [r3, #OFS_Pico_m_z80Run] + ldrb r2, [r3, #OFS_Pico_m_z80_reset] orr r0, r0, r1 orr r0, r0, r2 bx lr -m_read8_not_brq: - ldr r2, =PicoOpt - ldr r2, [r2] - tst r2, #POPT_EN_32X - bne PicoRead8_32x - mov r0, #0 - bx lr - @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ PicoRead16_sram: @ u32 a, u32 d - ldr r2, =(SRam) - ldr r3, =(Pico+0x22200) - ldr r1, [r2, #8] @ SRam.end + PIC_LDR(r3, r1, Pico) + ldr r1, [r3, #OFS_Pico_sv_end] cmp r0, r1 bgt m_read16_nosram - ldr r1, [r2, #4] @ SRam.start - cmp r0, r1 + ldr r2, [r3, #OFS_Pico_sv_start] + cmp r0, r2 blt m_read16_nosram - ldrb r1, [r3, #0x11] @ Pico.m.sram_reg + ldrb r1, [r3, #OFS_Pico_m_sram_reg] tst r1, #SRR_MAPPED beq m_read16_nosram - ldr r1, [r2, #0x0c] + ldr r1, [r3, #OFS_Pico_sv_flags] tst r1, #SRF_EEPROM bne EEPROM_read - ldr r1, [r2, #4] @ SRam.start - ldr r2, [r2] @ SRam.data - sub r0, r0, r1 - add r0, r0, r2 - ldrb r1, [r0], #1 - ldrb r0, [r0] + ldr r1, [r3, #OFS_Pico_sv_data] + sub r0, r0, r2 + ldrb r1, [r0, r1]! + ldrb r0, [r0, #1] orr r0, r0, r1, lsl #8 bx lr m_read16_nosram: - ldr r1, [r3, #4] @ romsize + ldr r1, [r3, #OFS_Pico_romsize] cmp r0, r1 movgt r0, #0 bxgt lr @ bad location @ XXX: banking unfriendly - ldr r1, [r3] + ldr r1, [r3, #OFS_Pico_rom] ldrh r0, [r1, r0] bx lr @@ -150,33 +143,25 @@ PicoRead16_io: @ u32 a, u32 d m_read16_not_io: and r2, r0, #0xfc00 cmp r2, #0x1000 - bne m_read16_not_brq + bne PicoRead16_32x - ldr r3, =(Pico+0x22200) + PIC_LDR(r3, r2, Pico) and r2, r0, #0xff00 - ldr r0, [r3, #8] @ Pico.m.rotate + ldr r0, [r3, #OFS_Pico_m_rotate] add r0, r0, #1 - strb r0, [r3, #8] + strb r0, [r3, #OFS_Pico_m_rotate] eor r0, r0, r0, lsl #5 eor r0, r0, r0, lsl #8 bic r0, r0, #0x100 @ bit8 defined in this area cmp r2, #0x1100 bxne lr @ not busreq - ldrb r1, [r3, #(8+0x01)] @ Pico.m.z80Run - ldrb r2, [r3, #(8+0x0f)] @ Pico.m.z80_reset + ldrb r1, [r3, #OFS_Pico_m_z80Run] + ldrb r2, [r3, #OFS_Pico_m_z80_reset] orr r0, r0, r1, lsl #8 orr r0, r0, r2, lsl #8 bx lr -m_read16_not_brq: - ldr r2, =PicoOpt - ldr r2, [r2] - tst r2, #POPT_EN_32X - bne PicoRead16_32x - mov r0, #0 - bx lr - @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ PicoWrite8_io: @ u32 a, u32 d @@ -201,20 +186,13 @@ m_write8_not_z80ctl: eor r2, r0, #0xa10000 eor r2, r2, #0x003000 eors r2, r2, #0x0000f1 - bne m_write8_not_sreg - ldr r3, =(Pico+0x22200) - ldrb r2, [r3, #(8+9)] @ Pico.m.sram_reg + bne PicoWrite8_32x + PIC_LDR(r3, r2, Pico) + ldrb r2, [r3, #OFS_Pico_m_sram_reg] and r1, r1, #(SRR_MAPPED|SRR_READONLY) bic r2, r2, #(SRR_MAPPED|SRR_READONLY) orr r2, r2, r1 - strb r2, [r3, #(8+9)] - bx lr - -m_write8_not_sreg: - ldr r2, =PicoOpt - ldr r2, [r2] - tst r2, #POPT_EN_32X - bne PicoWrite8_32x + strb r2, [r3, #OFS_Pico_m_sram_reg] bx lr @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -238,22 +216,112 @@ m_write16_not_z80ctl: eor r2, r0, #0xa10000 eor r2, r2, #0x003000 eors r2, r2, #0x0000f0 - bne m_write16_not_sreg - ldr r3, =(Pico+0x22200) - ldrb r2, [r3, #(8+9)] @ Pico.m.sram_reg + bne PicoWrite16_32x + PIC_LDR(r3, r2, Pico) + ldrb r2, [r3, #OFS_Pico_m_sram_reg] and r1, r1, #(SRR_MAPPED|SRR_READONLY) bic r2, r2, #(SRR_MAPPED|SRR_READONLY) orr r2, r2, r1 - strb r2, [r3, #(8+9)] + strb r2, [r3, #OFS_Pico_m_sram_reg] bx lr -m_write16_not_sreg: - ldr r2, =PicoOpt - ldr r2, [r2] - tst r2, #POPT_EN_32X - bne PicoWrite16_32x +.global m68k_read8 +.global m68k_read16 +.global m68k_read32 +.global m68k_write8 +.global m68k_write16 +.global m68k_write32 + +m68k_read8: + PIC_LDR(r3, r2, m68k_read8_map) + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + eor r2, r0, #1 + movs r3, r3, lsl #1 + ldrccb r0, [r3, r2] + bxcc lr + bx r3 + +m68k_read16: + PIC_LDR(r3, r2, m68k_read16_map) + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + ldrcch r0, [r3, r0] + bxcc lr + bx r3 + +m68k_read32: + PIC_LDR(r3, r2, m68k_read16_map) + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + ldrcch r1, [r3, r0]! + ldrcch r0, [r3, #2] + orrcc r0, r0, r1, lsl #16 + bxcc lr + + stmfd sp!, {r0, r3, r4, lr} + mov lr, pc + bx r3 + ldmfd sp!, {r1, r3} + str r0, [sp] + add r0, r1, #2 + mov lr, pc + bx r3 + ldmfd sp!, {r1, lr} + mov r0, r0, lsl #16 + mov r1, r1, lsl #16 + orr r0, r1, r0, lsr #16 bx lr +m68k_write8: + PIC_LDR(r3, r2, m68k_write8_map) + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + eor r2, r0, #1 + movs r3, r3, lsl #1 + strccb r1, [r3, r2] + bxcc lr + bx r3 + +m68k_write16: + PIC_LDR(r3, r2, m68k_write16_map) + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + strcch r1, [r3, r0] + bxcc lr + bx r3 + +m68k_write32: + PIC_LDR(r3, r2, m68k_write16_map) + bic r0, r0, #0xff000000 + mov r2, r0, lsr #16 + ldr r3, [r3, r2, lsl #2] + bic r0, r0, #1 + movs r3, r3, lsl #1 + movcc r2, r1, lsr #16 + strcch r2, [r3, r0]! + strcch r1, [r3, #2] + bxcc lr + + stmfd sp!, {r0, r1, r3, lr} + mov r1, r1, lsr #16 + mov lr, pc + bx r3 + ldmfd sp!, {r0, r1, r3, lr} + add r0, r0, #2 + bx r3 + .pool @ vim:filetype=armasm diff --git a/pico/misc.c b/pico/misc.c index f9e85cda..9065476b 100644 --- a/pico/misc.c +++ b/pico/misc.c @@ -8,114 +8,8 @@ #include "pico_int.h" -// H-counter table for hvcounter reads in 40col mode -// based on Gens code -const unsigned char hcounts_40[] = { -0x07,0x07,0x08,0x08,0x08,0x09,0x09,0x0a,0x0a,0x0b,0x0b,0x0b,0x0c,0x0c,0x0d,0x0d, -0x0e,0x0e,0x0e,0x0f,0x0f,0x10,0x10,0x10,0x11,0x11,0x12,0x12,0x13,0x13,0x13,0x14, -0x14,0x15,0x15,0x15,0x16,0x16,0x17,0x17,0x18,0x18,0x18,0x19,0x19,0x1a,0x1a,0x1b, -0x1b,0x1b,0x1c,0x1c,0x1d,0x1d,0x1d,0x1e,0x1e,0x1f,0x1f,0x20,0x20,0x20,0x21,0x21, -0x22,0x22,0x23,0x23,0x23,0x24,0x24,0x25,0x25,0x25,0x26,0x26,0x27,0x27,0x28,0x28, -0x28,0x29,0x29,0x2a,0x2a,0x2a,0x2b,0x2b,0x2c,0x2c,0x2d,0x2d,0x2d,0x2e,0x2e,0x2f, -0x2f,0x30,0x30,0x30,0x31,0x31,0x32,0x32,0x32,0x33,0x33,0x34,0x34,0x35,0x35,0x35, -0x36,0x36,0x37,0x37,0x38,0x38,0x38,0x39,0x39,0x3a,0x3a,0x3a,0x3b,0x3b,0x3c,0x3c, -0x3d,0x3d,0x3d,0x3e,0x3e,0x3f,0x3f,0x3f,0x40,0x40,0x41,0x41,0x42,0x42,0x42,0x43, -0x43,0x44,0x44,0x45,0x45,0x45,0x46,0x46,0x47,0x47,0x47,0x48,0x48,0x49,0x49,0x4a, -0x4a,0x4a,0x4b,0x4b,0x4c,0x4c,0x4d,0x4d,0x4d,0x4e,0x4e,0x4f,0x4f,0x4f,0x50,0x50, -0x51,0x51,0x52,0x52,0x52,0x53,0x53,0x54,0x54,0x55,0x55,0x55,0x56,0x56,0x57,0x57, -0x57,0x58,0x58,0x59,0x59,0x5a,0x5a,0x5a,0x5b,0x5b,0x5c,0x5c,0x5c,0x5d,0x5d,0x5e, -0x5e,0x5f,0x5f,0x5f,0x60,0x60,0x61,0x61,0x62,0x62,0x62,0x63,0x63,0x64,0x64,0x64, -0x65,0x65,0x66,0x66,0x67,0x67,0x67,0x68,0x68,0x69,0x69,0x6a,0x6a,0x6a,0x6b,0x6b, -0x6c,0x6c,0x6c,0x6d,0x6d,0x6e,0x6e,0x6f,0x6f,0x6f,0x70,0x70,0x71,0x71,0x71,0x72, -0x72,0x73,0x73,0x74,0x74,0x74,0x75,0x75,0x76,0x76,0x77,0x77,0x77,0x78,0x78,0x79, -0x79,0x79,0x7a,0x7a,0x7b,0x7b,0x7c,0x7c,0x7c,0x7d,0x7d,0x7e,0x7e,0x7f,0x7f,0x7f, -0x80,0x80,0x81,0x81,0x81,0x82,0x82,0x83,0x83,0x84,0x84,0x84,0x85,0x85,0x86,0x86, -0x86,0x87,0x87,0x88,0x88,0x89,0x89,0x89,0x8a,0x8a,0x8b,0x8b,0x8c,0x8c,0x8c,0x8d, -0x8d,0x8e,0x8e,0x8e,0x8f,0x8f,0x90,0x90,0x91,0x91,0x91,0x92,0x92,0x93,0x93,0x94, -0x94,0x94,0x95,0x95,0x96,0x96,0x96,0x97,0x97,0x98,0x98,0x99,0x99,0x99,0x9a,0x9a, -0x9b,0x9b,0x9b,0x9c,0x9c,0x9d,0x9d,0x9e,0x9e,0x9e,0x9f,0x9f,0xa0,0xa0,0xa1,0xa1, -0xa1,0xa2,0xa2,0xa3,0xa3,0xa3,0xa4,0xa4,0xa5,0xa5,0xa6,0xa6,0xa6,0xa7,0xa7,0xa8, -0xa8,0xa9,0xa9,0xa9,0xaa,0xaa,0xab,0xab,0xab,0xac,0xac,0xad,0xad,0xae,0xae,0xae, -0xaf,0xaf,0xb0,0xb0, -0xe4,0xe4,0xe4,0xe5,0xe5,0xe6,0xe6,0xe6,0xe7,0xe7,0xe8,0xe8,0xe9,0xe9,0xe9,0xea, -0xea,0xeb,0xeb,0xeb,0xec,0xec,0xed,0xed,0xee,0xee,0xee,0xef,0xef,0xf0,0xf0,0xf1, -0xf1,0xf1,0xf2,0xf2,0xf3,0xf3,0xf3,0xf4,0xf4,0xf5,0xf5,0xf6,0xf6,0xf6,0xf7,0xf7, -0xf8,0xf8,0xf9,0xf9,0xf9,0xfa,0xfa,0xfb,0xfb,0xfb,0xfc,0xfc,0xfd,0xfd,0xfe,0xfe, -0xfe,0xff,0xff,0x00,0x00,0x00,0x01,0x01,0x02,0x02,0x03,0x03,0x03,0x04,0x04,0x05, -0x05,0x06,0x06,0x06, -0x07,0x07,0x08,0x08,0x08,0x09,0x09,0x0a,0x0a,0x0b,0x0b,0x0b,0x0c,0x0c,0x0d,0x0d, -0x0e,0x0e,0x0e,0x0f,0x0f,0x10,0x10,0x10, -}; - -// H-counter table for hvcounter reads in 32col mode -const unsigned char hcounts_32[] = { -0x05,0x05,0x05,0x06,0x06,0x07,0x07,0x07,0x08,0x08,0x08,0x09,0x09,0x09,0x0a,0x0a, -0x0a,0x0b,0x0b,0x0b,0x0c,0x0c,0x0c,0x0d,0x0d,0x0d,0x0e,0x0e,0x0f,0x0f,0x0f,0x10, -0x10,0x10,0x11,0x11,0x11,0x12,0x12,0x12,0x13,0x13,0x13,0x14,0x14,0x14,0x15,0x15, -0x15,0x16,0x16,0x17,0x17,0x17,0x18,0x18,0x18,0x19,0x19,0x19,0x1a,0x1a,0x1a,0x1b, -0x1b,0x1b,0x1c,0x1c,0x1c,0x1d,0x1d,0x1d,0x1e,0x1e,0x1f,0x1f,0x1f,0x20,0x20,0x20, -0x21,0x21,0x21,0x22,0x22,0x22,0x23,0x23,0x23,0x24,0x24,0x24,0x25,0x25,0x26,0x26, -0x26,0x27,0x27,0x27,0x28,0x28,0x28,0x29,0x29,0x29,0x2a,0x2a,0x2a,0x2b,0x2b,0x2b, -0x2c,0x2c,0x2c,0x2d,0x2d,0x2e,0x2e,0x2e,0x2f,0x2f,0x2f,0x30,0x30,0x30,0x31,0x31, -0x31,0x32,0x32,0x32,0x33,0x33,0x33,0x34,0x34,0x34,0x35,0x35,0x36,0x36,0x36,0x37, -0x37,0x37,0x38,0x38,0x38,0x39,0x39,0x39,0x3a,0x3a,0x3a,0x3b,0x3b,0x3b,0x3c,0x3c, -0x3d,0x3d,0x3d,0x3e,0x3e,0x3e,0x3f,0x3f,0x3f,0x40,0x40,0x40,0x41,0x41,0x41,0x42, -0x42,0x42,0x43,0x43,0x43,0x44,0x44,0x45,0x45,0x45,0x46,0x46,0x46,0x47,0x47,0x47, -0x48,0x48,0x48,0x49,0x49,0x49,0x4a,0x4a,0x4a,0x4b,0x4b,0x4b,0x4c,0x4c,0x4d,0x4d, -0x4d,0x4e,0x4e,0x4e,0x4f,0x4f,0x4f,0x50,0x50,0x50,0x51,0x51,0x51,0x52,0x52,0x52, -0x53,0x53,0x53,0x54,0x54,0x55,0x55,0x55,0x56,0x56,0x56,0x57,0x57,0x57,0x58,0x58, -0x58,0x59,0x59,0x59,0x5a,0x5a,0x5a,0x5b,0x5b,0x5c,0x5c,0x5c,0x5d,0x5d,0x5d,0x5e, -0x5e,0x5e,0x5f,0x5f,0x5f,0x60,0x60,0x60,0x61,0x61,0x61,0x62,0x62,0x62,0x63,0x63, -0x64,0x64,0x64,0x65,0x65,0x65,0x66,0x66,0x66,0x67,0x67,0x67,0x68,0x68,0x68,0x69, -0x69,0x69,0x6a,0x6a,0x6a,0x6b,0x6b,0x6c,0x6c,0x6c,0x6d,0x6d,0x6d,0x6e,0x6e,0x6e, -0x6f,0x6f,0x6f,0x70,0x70,0x70,0x71,0x71,0x71,0x72,0x72,0x72,0x73,0x73,0x74,0x74, -0x74,0x75,0x75,0x75,0x76,0x76,0x76,0x77,0x77,0x77,0x78,0x78,0x78,0x79,0x79,0x79, -0x7a,0x7a,0x7b,0x7b,0x7b,0x7c,0x7c,0x7c,0x7d,0x7d,0x7d,0x7e,0x7e,0x7e,0x7f,0x7f, -0x7f,0x80,0x80,0x80,0x81,0x81,0x81,0x82,0x82,0x83,0x83,0x83,0x84,0x84,0x84,0x85, -0x85,0x85,0x86,0x86,0x86,0x87,0x87,0x87,0x88,0x88,0x88,0x89,0x89,0x89,0x8a,0x8a, -0x8b,0x8b,0x8b,0x8c,0x8c,0x8c,0x8d,0x8d,0x8d,0x8e,0x8e,0x8e,0x8f,0x8f,0x8f,0x90, -0x90,0x90,0x91,0x91, -0xe8,0xe8,0xe8,0xe9,0xe9,0xe9,0xea,0xea,0xea,0xeb,0xeb,0xeb,0xec,0xec,0xec,0xed, -0xed,0xed,0xee,0xee,0xee,0xef,0xef,0xf0,0xf0,0xf0,0xf1,0xf1,0xf1,0xf2,0xf2,0xf2, -0xf3,0xf3,0xf3,0xf4,0xf4,0xf4,0xf5,0xf5,0xf5,0xf6,0xf6,0xf6,0xf7,0xf7,0xf8,0xf8, -0xf8,0xf9,0xf9,0xf9,0xfa,0xfa,0xfa,0xfb,0xfb,0xfb,0xfc,0xfc,0xfc,0xfd,0xfd,0xfd, -0xfe,0xfe,0xfe,0xff,0xff,0x00,0x00,0x00,0x01,0x01,0x01,0x02,0x02,0x02,0x03,0x03, -0x03,0x04,0x04,0x04, -0x05,0x05,0x05,0x06,0x06,0x07,0x07,0x07,0x08,0x08,0x08,0x09,0x09,0x09,0x0a,0x0a, -0x0a,0x0b,0x0b,0x0b,0x0c,0x0c,0x0c,0x0d, -}; - #ifndef _ASM_MISC_C -typedef struct -{ - int b0; - int b1; - int b2; - int b3; - int b4; - int b5; - int b6; - int b7; -} intblock; - -PICO_INTERNAL_ASM void memcpy16(unsigned short *dest, unsigned short *src, int count) -{ - if ((((long)dest | (long)src) & 3) == 0) - { - if (count >= 32) { - memcpy32((int *)dest, (int *)src, count/2); - count&=1; - } else { - for (; count >= 2; count -= 2, dest+=2, src+=2) - *(int *)dest = *(int *)src; - } - } - while (count--) - *dest++ = *src++; -} - - PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count) { unsigned char *src_ = src; @@ -125,27 +19,23 @@ PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count) } #ifndef _ASM_MISC_C_AMIPS -PICO_INTERNAL_ASM void memcpy32(int *dest, int *src, int count) +PICO_INTERNAL_ASM void memset32(void *dest_in, int c, int count) { - intblock *bd = (intblock *) dest, *bs = (intblock *) src; + int *dest = dest_in; - for (; count >= sizeof(*bd)/4; count -= sizeof(*bd)/4) - *bd++ = *bs++; - - dest = (int *)bd; src = (int *)bs; - while (count--) - *dest++ = *src++; -} - - -PICO_INTERNAL_ASM void memset32(int *dest, int c, int count) -{ for (; count >= 8; count -= 8, dest += 8) dest[0] = dest[1] = dest[2] = dest[3] = dest[4] = dest[5] = dest[6] = dest[7] = c; - while (count--) - *dest++ = c; + switch (count) { + case 7: *dest++ = c; + case 6: *dest++ = c; + case 5: *dest++ = c; + case 4: *dest++ = c; + case 3: *dest++ = c; + case 2: *dest++ = c; + case 1: *dest++ = c; + } } void memset32_uncached(int *dest, int c, int count) { memset32(dest, c, count); } #endif diff --git a/pico/misc_amips.s b/pico/misc_amips.s index 3aa70c77..7c3f1ade 100644 --- a/pico/misc_amips.s +++ b/pico/misc_amips.s @@ -101,78 +101,4 @@ ms32u_return: nop -.globl memcpy32 # int *dest, int *src, int count - -memcpy32: -mc32_aloop: - andi $t0, $a0, 0x3f - beqz $t0, mc32_bloop_prep - nop - lw $t1, 0($a1) - addiu $a2, -1 - sw $t1, 0($a0) - beqz $a2, mc32_return - addiu $a0, 4 - j mc32_aloop - addiu $a1, 4 - -mc32_bloop_prep: - srl $t0, $a2, 4 # we will do 64 bytes per iteration (cache line) - beqz $t0, mc32_bloop_end - -mc32_bloop: - addiu $t0, -1 - cache 0x18, ($a0) # create dirty exclusive - lw $t2, 0x00($a1) - lw $t3, 0x04($a1) - lw $t4, 0x08($a1) - lw $t5, 0x0c($a1) - lw $t6, 0x10($a1) - lw $t7, 0x14($a1) - lw $t8, 0x18($a1) - lw $t9, 0x1c($a1) - sw $t2, 0x00($a0) - sw $t3, 0x04($a0) - sw $t4, 0x08($a0) - sw $t5, 0x0c($a0) - sw $t6, 0x10($a0) - sw $t7, 0x14($a0) - sw $t8, 0x18($a0) - sw $t9, 0x1c($a0) - lw $t2, 0x20($a1) - lw $t3, 0x24($a1) - lw $t4, 0x28($a1) - lw $t5, 0x2c($a1) - lw $t6, 0x30($a1) - lw $t7, 0x34($a1) - lw $t8, 0x38($a1) - lw $t9, 0x3c($a1) - sw $t2, 0x20($a0) - sw $t3, 0x24($a0) - sw $t4, 0x28($a0) - sw $t5, 0x2c($a0) - sw $t6, 0x30($a0) - sw $t7, 0x34($a0) - sw $t8, 0x38($a0) - sw $t9, 0x3c($a0) - addiu $a0, 0x40 - bnez $t0, mc32_bloop - addiu $a1, 0x40 - -mc32_bloop_end: - andi $a2, $a2, 0x0f - beqz $a2, mc32_return - -mc32_cloop: - lw $t1, 0($a1) - addiu $a2, -1 - addiu $a1, 4 - sw $t1, 0($a0) - bnez $a2, mc32_cloop - addiu $a0, 4 - -mc32_return: - jr $ra - nop - # vim:filetype=mips diff --git a/pico/misc_arm.s b/pico/misc_arm.s index 15662a7b..56c74019 100644 --- a/pico/misc_arm.s +++ b/pico/misc_arm.s @@ -6,44 +6,6 @@ * See COPYING file in the top-level directory. */ -.global memcpy16 @ unsigned short *dest, unsigned short *src, int count - -memcpy16: - eor r3, r0, r1 - tst r3, #2 - bne mcp16_cant_align - - tst r0, #2 - ldrneh r3, [r1], #2 - subne r2, r2, #1 - strneh r3, [r0], #2 - - subs r2, r2, #4 - bmi mcp16_fin - -mcp16_loop: - ldmia r1!, {r3,r12} - subs r2, r2, #4 - stmia r0!, {r3,r12} - bpl mcp16_loop - -mcp16_fin: - tst r2, #2 - ldrne r3, [r1], #4 - strne r3, [r0], #4 - ands r2, r2, #1 - bxeq lr - -mcp16_cant_align: - ldrh r3, [r1], #2 - subs r2, r2, #1 - strh r3, [r0], #2 - bne mcp16_cant_align - - bx lr - - - @ 0x12345678 -> 0x34127856 @ r4=temp, lr=0x00ff00ff .macro bswap reg @@ -52,7 +14,6 @@ mcp16_cant_align: orr \reg, \reg, r4, lsl #8 .endm - @ dest must be halfword aligned, src can be unaligned .global memcpy16bswap @ unsigned short *dest, void *src, int count @@ -121,37 +82,6 @@ mcp16bs_cant_align2: bx lr - -.global memcpy32 @ int *dest, int *src, int count - -memcpy32: - stmfd sp!, {r4,lr} - - subs r2, r2, #4 - bmi mcp32_fin - -mcp32_loop: - ldmia r1!, {r3,r4,r12,lr} - subs r2, r2, #4 - stmia r0!, {r3,r4,r12,lr} - bpl mcp32_loop - -mcp32_fin: - tst r2, #3 - ldmeqfd sp!, {r4,pc} - tst r2, #1 - ldrne r3, [r1], #4 - strne r3, [r0], #4 - -mcp32_no_unal1: - tst r2, #2 - ldmneia r1!, {r3,r12} - ldmfd sp!, {r4,lr} - stmneia r0!, {r3,r12} - bx lr - - - .global memset32 @ int *dest, int c, int count memset32: diff --git a/pico/mode4.c b/pico/mode4.c index 22e30419..38002536 100644 --- a/pico/mode4.c +++ b/pico/mode4.c @@ -1,302 +1,942 @@ /* - * mode4/SMS renderer + * SMS renderer * (C) notaz, 2009-2010 + * (C) irixxxx, 2020-2024 + * + * currently supports VDP mode 4 (SMS and GG) and mode 3-0 (TMS) + * modes numbered after the bit numbers used in Sega and TI documentation * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ -/* - * TODO: - * - TMS9918 modes? - * - gg mode? - * - column scroll (reg 0 bit7) - * - 224/240 line modes - * - doubled sprites - */ #include "pico_int.h" +#include -static void (*FinalizeLineM4)(int line); +static void (*FinalizeLineSMS)(int line); static int skip_next_line; -static int screen_offset; +static int screen_offset, line_offset; +static u8 mode; -#define PLANAR_PIXEL(x,p) \ - t = pack & (0x80808080 >> p); \ - if (t) { \ - t = ((t >> (7-p)) | (t >> (14-p)) | (t >> (21-p)) | (t >> (28-p))) & 0x0f; \ +static unsigned int sprites_addr[32]; // bitmap address +static unsigned char sprites_c[32]; // TMS sprites color +static int sprites_x[32]; // x position +static int sprites; // count +static unsigned char sprites_map[2+256/8+2]; // collision detection map + +unsigned int sprites_status; + +/* sprite collision detection */ +static int CollisionDetect(u8 *mb, u16 sx, unsigned int pack, int zoomed) +{ + static u8 morton[16] = { 0x00,0x03,0x0c,0x0f,0x30,0x33,0x3c,0x3f, + 0xc0,0xc3,0xcc,0xcf,0xf0,0xf3,0xfc,0xff }; + u8 *mp = mb + (sx>>3); + unsigned col, m; + + // check sprite map for collision and update map with current sprite + if (!zoomed) { // 8 sprite pixels + m = mp[0] | (mp[1]<<8); + col = m & (pack<<(sx&7)); // collision if current sprite overlaps sprite map + m |= pack<<(sx&7); + mp[0] = m, mp[1] = m>>8; + } else { // 16 sprite pixels in zoom mode + pack = morton[pack&0x0f] | (morton[(pack>>4)&0x0f] << 8); + m = mp[0] | (mp[1]<<8) | (mp[2]<<16); + col = m & (pack<<(sx&7)); + m |= pack<<(sx&7); + mp[0] = m, mp[1] = m>>8, mp[2] = m>>16; + } + + // invisible overscan area, not tested for collision + mb[0] = mb[33] = mb[34] = 0; + return col; +} + +/* Mode 4 - SMS Graphics */ +/*=======================*/ + +static void TileBGM4(u16 sx, int pal) +{ + if (sx & 3) { + u8 *pd = (u8 *)(Pico.est.HighCol + sx); + pd[0] = pd[1] = pd[2] = pd[3] = pal; + pd[4] = pd[5] = pd[6] = pd[7] = pal; + } else { + u32 *pd = (u32 *)(Pico.est.HighCol + sx); + pd[0] = pd[1] = pal * 0x01010101; + } +} + +// 8 pixels are arranged in 4 bitplane bytes in a 32 bit word. To pull the +// 4 bitplanes together multiply with each bit distance (multiples of 1<<7) +#define PLANAR_PIXELBG(x,p) \ + t = (pack>>(7-p)) & 0x01010101; \ + t = (t*0x10204080) >> 28; \ + pd[x] = pal|t; + +static void TileNormBGM4(u16 sx, unsigned int pack, int pal) +{ + u8 *pd = Pico.est.HighCol + sx; + u32 t; + + PLANAR_PIXELBG(0, 0) + PLANAR_PIXELBG(1, 1) + PLANAR_PIXELBG(2, 2) + PLANAR_PIXELBG(3, 3) + PLANAR_PIXELBG(4, 4) + PLANAR_PIXELBG(5, 5) + PLANAR_PIXELBG(6, 6) + PLANAR_PIXELBG(7, 7) +} + +static void TileFlipBGM4(u16 sx, unsigned int pack, int pal) +{ + u8 *pd = Pico.est.HighCol + sx; + u32 t; + + PLANAR_PIXELBG(0, 7) + PLANAR_PIXELBG(1, 6) + PLANAR_PIXELBG(2, 5) + PLANAR_PIXELBG(3, 4) + PLANAR_PIXELBG(4, 3) + PLANAR_PIXELBG(5, 2) + PLANAR_PIXELBG(6, 1) + PLANAR_PIXELBG(7, 0) +} + +// non-transparent sprite pixels apply if no higher prio pixel is already there +#define PLANAR_PIXELSP(x,p) \ + t = (pack>>(7-p)) & 0x01010101; \ + if (t && (pd[x] & 0x2f) <= 0x20) { \ + t = (t*0x10204080) >> 28; \ pd[x] = pal|t; \ } -static int TileNormM4(int sx, int addr, int pal) +static void TileNormSprM4(u16 sx, unsigned int pack, int pal) { - unsigned char *pd = HighCol + sx; - unsigned int pack, t; + u8 *pd = Pico.est.HighCol + sx; + u32 t; - pack = *(unsigned int *)(Pico.vram + addr); /* Get 4 bitplanes / 8 pixels */ - if (pack) - { - PLANAR_PIXEL(0, 0) - PLANAR_PIXEL(1, 1) - PLANAR_PIXEL(2, 2) - PLANAR_PIXEL(3, 3) - PLANAR_PIXEL(4, 4) - PLANAR_PIXEL(5, 5) - PLANAR_PIXEL(6, 6) - PLANAR_PIXEL(7, 7) - return 0; - } - - return 1; /* Tile blank */ + PLANAR_PIXELSP(0, 0) + PLANAR_PIXELSP(1, 1) + PLANAR_PIXELSP(2, 2) + PLANAR_PIXELSP(3, 3) + PLANAR_PIXELSP(4, 4) + PLANAR_PIXELSP(5, 5) + PLANAR_PIXELSP(6, 6) + PLANAR_PIXELSP(7, 7) } -static int TileFlipM4(int sx,int addr,int pal) +static void TileDoubleSprM4(int sx, unsigned int pack, int pal) { - unsigned char *pd = HighCol + sx; - unsigned int pack, t; + u8 *pd = Pico.est.HighCol + sx; + u32 t; - pack = *(unsigned int *)(Pico.vram + addr); /* Get 4 bitplanes / 8 pixels */ - if (pack) - { - PLANAR_PIXEL(0, 7) - PLANAR_PIXEL(1, 6) - PLANAR_PIXEL(2, 5) - PLANAR_PIXEL(3, 4) - PLANAR_PIXEL(4, 3) - PLANAR_PIXEL(5, 2) - PLANAR_PIXEL(6, 1) - PLANAR_PIXEL(7, 0) - return 0; - } - - return 1; /* Tile blank */ + PLANAR_PIXELSP(0, 0) + PLANAR_PIXELSP(1, 0) + PLANAR_PIXELSP(2, 1) + PLANAR_PIXELSP(3, 1) + PLANAR_PIXELSP(4, 2) + PLANAR_PIXELSP(5, 2) + PLANAR_PIXELSP(6, 3) + PLANAR_PIXELSP(7, 3) + PLANAR_PIXELSP(8, 4) + PLANAR_PIXELSP(9, 4) + PLANAR_PIXELSP(10, 5) + PLANAR_PIXELSP(11, 5) + PLANAR_PIXELSP(12, 6) + PLANAR_PIXELSP(13, 6) + PLANAR_PIXELSP(14, 7) + PLANAR_PIXELSP(15, 7) } -static void draw_sprites(int scanline) +static void ParseSpritesM4(int scanline) { struct PicoVideo *pv = &Pico.video; - unsigned int sprites_addr[8]; - unsigned int sprites_x[8]; - unsigned char *sat; - int xoff = 8; // relative to HighCol, which is (screen - 8) + u8 *sat; + int xoff = line_offset; int sprite_base, addr_mask; - int i, s, h; + int zoomed = pv->reg[1] & 0x1; // zoomed sprites, e.g. Earthworm Jim + unsigned int pack; + int i, s, h, m; if (pv->reg[0] & 8) - xoff = 0; + xoff -= 8; // sprite shift + if (Pico.m.hardware & PMS_HW_LCD) + xoff -= 48; // GG LCD, adjust to center 160 px - sat = (unsigned char *)Pico.vram + ((pv->reg[5] & 0x7e) << 7); + sat = (u8 *)PicoMem.vram + ((pv->reg[5] & 0x7e) << 7); if (pv->reg[1] & 2) { addr_mask = 0xfe; h = 16; } else { addr_mask = 0xff; h = 8; } + if (zoomed) h *= 2; sprite_base = (pv->reg[6] & 4) << (13-2-1); - for (i = s = 0; i < 64 && s < 8; i++) + m = pv->status & SR_C; + memset(sprites_map, 0, sizeof(sprites_map)); + for (i = s = 0; i < 64; i++) { int y; - y = sat[i] + 1; - if (y == 0xd1) + y = sat[MEM_LE2(i)]; + if (y == 0xd0 && !((pv->reg[0] & 6) == 6 && (pv->reg[1] & 0x18))) break; + if (y >= 0xe0) + y -= 256; + y &= ~zoomed; // zoomed sprites apparently only on even lines, see GG Tarzan if (y + h <= scanline || scanline < y) continue; // not on this line + if (s >= 8) { + if (scanline >= 0) sprites_status |= SR_SOVR; + if (!(PicoIn.opt & POPT_DIS_SPRITE_LIM) || s >= 32) + break; + } - sprites_x[s] = xoff + sat[0x80 + i*2]; - sprites_addr[s] = sprite_base + ((sat[0x80 + i*2 + 1] & addr_mask) << (5-1)) + - ((scanline - y) << (2-1)); - s++; + if (xoff + sat[MEM_LE2(0x80 + i*2)] >= 0) { + sprites_x[s] = xoff + sat[MEM_LE2(0x80 + i*2)]; + sprites_addr[s] = sprite_base + ((sat[MEM_LE2(0x80 + i*2 + 1)] & addr_mask) << (5-1)) + + ((scanline - y) >> zoomed << (2-1)); + if (Pico.video.reg[1] & 0x40) { + // collision detection. Do it here since off-screen lines aren't drawn + pack = CPU_LE2(*(u32 *)(PicoMem.vram + sprites_addr[s])); + // make sprite pixel map by merging the 4 bitplanes + pack = ((pack | (pack>>16)) | ((pack | (pack>>16))>>8)) & 0xff; + if (!m) m = CollisionDetect(sprites_map, sprites_x[s], pack, zoomed); + // no collision detection in 1st column if it's masked + if (pv->reg[0] & 0x20) + sprites_map[1] = 0; + } + s++; + } } - - // now draw all sprites backwards - for (--s; s >= 0; s--) - TileNormM4(sprites_x[s], sprites_addr[s], 0x10); + if (m) + sprites_status |= SR_C; + sprites = s; } -// tilex_ty_prio merged to reduce register pressure -static void draw_strip(const unsigned short *nametab, int dx, int cells, int tilex_ty_prio) +static void DrawSpritesM4(void) { - int oldcode = -1, blank = -1; // The tile we know is blank + struct PicoVideo *pv = &Pico.video; + unsigned int pack; + int zoomed = pv->reg[1] & 0x1; // zoomed sprites, e.g. Earthworm Jim + int s = sprites; + + // now draw all sprites backwards + for (--s; s >= 0; s--) { + pack = CPU_LE2(*(u32 *)(PicoMem.vram + sprites_addr[s])); + if (zoomed) TileDoubleSprM4(sprites_x[s], pack, 0x10); + else TileNormSprM4(sprites_x[s], pack, 0x10); + } +} + +// cells_dx, tilex_ty merged to reduce register pressure +static void DrawStripM4(const u16 *nametab, int cells_dx, int tilex_ty) +{ + int oldcode = -1; int addr = 0, pal = 0; // Draw tiles across screen: - for (; cells > 0; dx += 8, tilex_ty_prio++, cells--) + for (; cells_dx >= 0; cells_dx += 8, tilex_ty++, cells_dx -= 0x10000) { - int code, zero; + unsigned int pack; + unsigned code; - code = nametab[tilex_ty_prio & 0x1f]; - if (code == blank) - continue; - if ((code ^ tilex_ty_prio) & 0x1000) // priority differs? - continue; + code = nametab[tilex_ty & 0x1f]; if (code != oldcode) { oldcode = code; // Get tile address/2: addr = (code & 0x1ff) << 4; - addr += tilex_ty_prio >> 16; + addr += tilex_ty >> 16; if (code & 0x0400) addr ^= 0xe; // Y-flip - pal = (code>>7) & 0x10; + pal = (code>>7) & 0x30; // prio | palette select } - if (code&0x0200) zero = TileFlipM4(dx, addr, pal); - else zero = TileNormM4(dx, addr, pal); - - if (zero) - blank = code; // We know this tile is blank now + pack = CPU_LE2(*(u32 *)(PicoMem.vram + addr)); // Get 4 bitplanes / 8 pixels + if (pack == 0) TileBGM4(cells_dx, pal); + else if (code & 0x0200) TileFlipBGM4(cells_dx, pack, pal); + else TileNormBGM4(cells_dx, pack, pal); } } static void DrawDisplayM4(int scanline) { struct PicoVideo *pv = &Pico.video; - unsigned short *nametab; + u16 *nametab, *nametab2; int line, tilex, dx, ty, cells; int cellskip = 0; // XXX int maxcells = 32; // Find the line in the name table line = pv->reg[9] + scanline; // vscroll + scanline - if (line >= 224) - line -= 224; // Find name table: - nametab = Pico.vram; - nametab += (pv->reg[2] & 0x0e) << (10-1); - nametab += (line>>3) << (6-1); + nametab = PicoMem.vram; + if ((pv->reg[0] & 6) == 6 && (pv->reg[1] & 0x18)) { + // 224/240 line mode + line &= 0xff; + nametab += ((pv->reg[2] & 0x0c) << (10-1)) + (0x700 >> 1); + } else { + while (line >= 224) line -= 224; + nametab += (pv->reg[2] & 0x0e) << (10-1); + // old SMS only, masks line:7 with reg[2]:0 for address calculation + //if ((pv->reg[2] & 0x01) == 0) line &= 0x7f; + } + nametab2 = nametab + ((scanline>>3) << (6-1)); + nametab = nametab + ((line>>3) << (6-1)); dx = pv->reg[8]; // hscroll if (scanline < 16 && (pv->reg[0] & 0x40)) - dx = 0; // hscroll disabled for top 2 rows + dx = 0; // hscroll disabled for top 2 rows (e.g. Fantasy Zone II) - tilex = ((-dx >> 3) + cellskip) & 0x1f; + tilex = (32 - (dx >> 3) + cellskip) & 0x1f; ty = (line & 7) << 1; // Y-Offset into tile - cells = maxcells - cellskip; + cells = maxcells - cellskip - 1; - dx = ((dx - 1) & 7) + 1; - if (dx != 8) - cells++; // have hscroll, need to draw 1 cell more + dx = (dx & 7); dx += cellskip << 3; + dx += line_offset; - // low priority tiles - if (PicoDrawMask & PDRAW_LAYERB_ON) - draw_strip(nametab, dx, cells, tilex | 0x0000 | (ty << 16)); + // tiles + if (!(pv->debug_p & PVD_KILL_B)) { + if (Pico.m.hardware & PMS_HW_LCD) { + // on GG render only the center 160 px, but mind hscroll + DrawStripM4(nametab , (dx-8) | ((cells-11)<< 16),(tilex+5) | (ty << 16)); + } else if (pv->reg[0] & 0x80) { + // vscroll disabled for rightmost 8 columns (e.g. Gauntlet) + int dx2 = dx + (cells-8)*8, tilex2 = tilex + (cells-8), ty2 = scanline&7; + DrawStripM4(nametab, dx | ((cells-8) << 16), tilex | (ty << 16)); + DrawStripM4(nametab2, dx2 | (8 << 16), tilex2 | (ty2 << 17)); + } else + DrawStripM4(nametab , dx | ( cells << 16), tilex | (ty << 16)); + } // sprites - if (PicoDrawMask & PDRAW_SPRITES_LOW_ON) - draw_sprites(scanline); + if (!(pv->debug_p & PVD_KILL_S_LO)) + DrawSpritesM4(); - // high priority tiles (use virtual layer switch just for fun) - if (PicoDrawMask & PDRAW_LAYERA_ON) - draw_strip(nametab, dx, cells, tilex | 0x1000 | (ty << 16)); - - if (pv->reg[0] & 0x20) - // first column masked - ((int *)HighCol)[2] = ((int *)HighCol)[3] = 0xe0e0e0e0; + if ((pv->reg[0] & 0x20) && !(Pico.m.hardware & PMS_HW_LCD)) { + // first column masked with background, caculate offset to start of line + dx = line_offset / 4; + ty = ((pv->reg[7]&0x0f)|0x10) * 0x01010101; + ((u32 *)Pico.est.HighCol)[dx] = ((u32 *)Pico.est.HighCol)[dx+1] = ty; + } } -void PicoFrameStartMode4(void) -{ - int lines = 192; - skip_next_line = 0; - screen_offset = 24; - rendstatus = PDRAW_32_COLS; - if ((Pico.video.reg[0] & 6) == 6 && (Pico.video.reg[1] & 0x18)) { - if (Pico.video.reg[1] & 0x08) { - screen_offset = 0; - lines = 240; +/* TMS Modes */ +/*===========*/ + +/* Background */ + +#define TMS_PIXELBG(x,p) \ + t = (pack>>(7-p)) & 0x01; \ + t = (pal >> (t << 2)) & 0x0f; \ + if (t) \ + pd[x] = t; + +static void TileNormBgM1(u16 sx, unsigned int pack, int pal) /* Text */ +{ + u8 *pd = Pico.est.HighCol + sx; + unsigned int t; + + TMS_PIXELBG(0, 0) + TMS_PIXELBG(1, 1) + TMS_PIXELBG(2, 2) + TMS_PIXELBG(3, 3) + TMS_PIXELBG(4, 4) + TMS_PIXELBG(5, 5) +} + +static void TileNormBgM2(u16 sx, int pal) /* Multicolor */ +{ + u8 *pd = Pico.est.HighCol + sx; + unsigned int pack = 0xf0; + unsigned int t; + + TMS_PIXELBG(0, 0) + TMS_PIXELBG(1, 1) + TMS_PIXELBG(2, 2) + TMS_PIXELBG(3, 3) + TMS_PIXELBG(4, 4) + TMS_PIXELBG(5, 5) + TMS_PIXELBG(6, 6) + TMS_PIXELBG(7, 7) +} + +static void TileNormBgMg(u16 sx, unsigned int pack, int pal) /* Graphics */ +{ + u8 *pd = Pico.est.HighCol + sx; + unsigned int t; + + TMS_PIXELBG(0, 0) + TMS_PIXELBG(1, 1) + TMS_PIXELBG(2, 2) + TMS_PIXELBG(3, 3) + TMS_PIXELBG(4, 4) + TMS_PIXELBG(5, 5) + TMS_PIXELBG(6, 6) + TMS_PIXELBG(7, 7) +} + +/* Sprites */ + +#define TMS_PIXELSP(x,p) \ + t = (pack>>(7-p)) & 0x01; \ + if (t) \ + pd[x] = pal; + +static void TileNormSprTMS(u16 sx, unsigned int pack, int pal) +{ + u8 *pd = Pico.est.HighCol + sx; + unsigned int t; + + TMS_PIXELSP(0, 0) + TMS_PIXELSP(1, 1) + TMS_PIXELSP(2, 2) + TMS_PIXELSP(3, 3) + TMS_PIXELSP(4, 4) + TMS_PIXELSP(5, 5) + TMS_PIXELSP(6, 6) + TMS_PIXELSP(7, 7) +} + +static void TileDoubleSprTMS(u16 sx, unsigned int pack, int pal) +{ + u8 *pd = Pico.est.HighCol + sx; + unsigned int t; + + TMS_PIXELSP(0, 0) + TMS_PIXELSP(1, 0) + TMS_PIXELSP(2, 1) + TMS_PIXELSP(3, 1) + TMS_PIXELSP(4, 2) + TMS_PIXELSP(5, 2) + TMS_PIXELSP(6, 3) + TMS_PIXELSP(7, 3) + TMS_PIXELSP(8, 4) + TMS_PIXELSP(9, 4) + TMS_PIXELSP(10, 5) + TMS_PIXELSP(11, 5) + TMS_PIXELSP(12, 6) + TMS_PIXELSP(13, 6) + TMS_PIXELSP(14, 7) + TMS_PIXELSP(15, 7) +} + +static void ParseSpritesTMS(int scanline) +{ + struct PicoVideo *pv = &Pico.video; + unsigned int pack; + u8 *sat; + int xoff; + int sprite_base, addr_mask; + int zoomed = pv->reg[1] & 0x1; // zoomed sprites + int i, s, h, m; + + xoff = line_offset; + + sat = (u8 *)PicoMem.vramb + ((pv->reg[5] & 0x7e) << 7); + if (pv->reg[1] & 2) { + addr_mask = 0xfc; h = 16; + } else { + addr_mask = 0xff; h = 8; + } + if (zoomed) h *= 2; + sprite_base = (pv->reg[6] & 0x7) << 11; + + m = pv->status & SR_C; + memset(sprites_map, 0, sizeof(sprites_map)); + /* find sprites on this scanline */ + for (i = s = 0; i < 32; i++) + { + int x, y; + y = sat[MEM_LE2(4*i)]; + if (y == 0xd0) + break; + if (y >= 0xe0) + y -= 256; + y &= ~zoomed; + if (y + h <= scanline || scanline < y) + continue; // not on this line + if (s >= 4) { + if (scanline >= 0) sprites_status |= SR_SOVR | i; + if (!(PicoIn.opt & POPT_DIS_SPRITE_LIM) || s >= 32) + break; } - else { - screen_offset = 8; - lines = 224; + x = sat[MEM_LE2(4*i+1)] + xoff; + if (sat[MEM_LE2(4*i+3)] & 0x80) + x -= 32; + + sprites_c[s] = sat[MEM_LE2(4*i+3)] & 0x0f; + sprites_x[s] = x; + sprites_addr[s] = sprite_base + ((sat[MEM_LE2(4*i + 2)] & addr_mask) << 3) + + ((scanline - y) >> zoomed); + if (Pico.video.reg[1] & 0x40) { + // collision detection. Do it here since off-screen lines aren't drawn + if (sprites_c[s] && x > 0) { + pack = PicoMem.vramb[MEM_LE2(sprites_addr[s])]; + if (!m) m = CollisionDetect(sprites_map, x, pack, zoomed); + } + x += (zoomed ? 16:8); + if (sprites_c[s] && (pv->reg[1] & 0x2) && x > 0 && x < 8+256) { + pack = PicoMem.vramb[MEM_LE2(sprites_addr[s]+0x10)]; + if (!m) m = CollisionDetect(sprites_map, x, pack, zoomed); + } + } + s++; + } + if (m) + sprites_status |= SR_C; + sprites = s; +} + +/* Draw sprites into a scanline, max 4 */ +static void DrawSpritesTMS(void) +{ + struct PicoVideo *pv = &Pico.video; + unsigned int pack; + int zoomed = pv->reg[1] & 0x1; // zoomed sprites + int s = sprites; + + // now draw all sprites backwards + for (--s; s >= 0; s--) { + int x, c, w = (zoomed ? 16: 8); + x = sprites_x[s]; + c = sprites_c[s]; + // c may be 0 (transparent): sprite invisible + if (c && x > 0) { + pack = PicoMem.vramb[MEM_LE2(sprites_addr[s])]; + if (zoomed) TileDoubleSprTMS(x, pack, c); + else TileNormSprTMS(x, pack, c); + } + if (c && (pv->reg[1] & 0x2) && (x+=w) > 0 && x < 8+256) { + pack = PicoMem.vramb[MEM_LE2(sprites_addr[s]+0x10)]; + if (zoomed) TileDoubleSprTMS(x, pack, c); + else TileNormSprTMS(x, pack, c); + } + } +} + + +/* Mode 1 - Text */ +/*===============*/ + +/* Draw the background into a scanline; cells, dx, tilex, ty merged to reduce registers */ +static void DrawStripM1(const u8 *nametab, const u8 *pattab, int cells_dx, int tilex_ty) +{ + // Draw tiles across screen: + for (; cells_dx >= 0; cells_dx += 6, tilex_ty++, cells_dx -= 0x10000) + { + unsigned int pack, pal; + unsigned code; + + code = nametab[tilex_ty & 0x3f]; + pal = Pico.video.reg[7]; + pack = pattab[code << 3]; + TileNormBgM1(cells_dx, pack, pal); + } +} + +/* Draw a scanline */ +static void DrawDisplayM1(int scanline) +{ + struct PicoVideo *pv = &Pico.video; + u8 *nametab, *pattab; + int tilex, dx, cells; + int cellskip = 0; // XXX + int maxcells = 40; + unsigned mask = pv->reg[0] & 0x2 ? 0x2000 : 0x3800; // M3: 2 bits table select + + // name, color, pattern table: + nametab = PicoMem.vramb + ((pv->reg[2]<<10) & 0x3c00); + pattab = PicoMem.vramb + ((pv->reg[4]<<11) & mask); + pattab += ((scanline>>6) << 11) & ~mask; // table select bits for M3 + + nametab += ((scanline>>3) * maxcells); + pattab += (scanline & 0x7); + + tilex = cellskip & 0x1f; + cells = maxcells - cellskip - 1; + dx = 8 + (cellskip << 3) + line_offset; + + // tiles + if (!(pv->debug_p & PVD_KILL_B)) + DrawStripM1(nametab, pattab, dx | (cells << 16), tilex | (scanline << 16)); +} + + +/* Mode 2 - Multicolor */ +/*=====================*/ + +/* Draw the background into a scanline; cells, dx, tilex, ty merged to reduce registers */ +static void DrawStripM2(const u8 *nametab, const u8 *pattab, int cells_dx, int tilex_ty) +{ + // Draw tiles across screen: + for (; cells_dx >= 0; cells_dx += 8, tilex_ty++, cells_dx -= 0x10000) + { + unsigned int pal; + unsigned code; + + code = nametab[tilex_ty & 0x1f]; + pal = pattab[code << 3]; + TileNormBgM2(cells_dx, pal); + } +} + +/* Draw a scanline */ +static void DrawDisplayM2(int scanline) +{ + struct PicoVideo *pv = &Pico.video; + u8 *nametab, *pattab; + int tilex, dx, cells; + int cellskip = 0; // XXX + int maxcells = 32; + unsigned mask = pv->reg[0] & 0x2 ? 0x2000 : 0x3800; // M3: 2 bits table select + + // name, color, pattern table: + nametab = PicoMem.vramb + ((pv->reg[2]<<10) & 0x3c00); + pattab = PicoMem.vramb + ((pv->reg[4]<<11) & mask); + pattab += ((scanline>>6) << 11) & ~mask; // table select bits for M3 + + nametab += (scanline>>3) << 5; + pattab += (scanline>>2) & 0x7; + + tilex = cellskip & 0x1f; + cells = maxcells - cellskip - 1; + dx = (cellskip << 3) + line_offset; + + // tiles + if (!(pv->debug_p & PVD_KILL_B)) + DrawStripM2(nametab, pattab, dx | (cells << 16), tilex | (scanline << 16)); + + // sprites + if (!(pv->debug_p & PVD_KILL_S_LO)) + DrawSpritesTMS(); +} + + +/* Mode 3 - Graphics II */ +/*======================*/ + +/* Draw the background into a scanline; cells, dx, tilex, ty merged to reduce registers */ +static void DrawStripM3(const u8 *nametab, const u8 *coltab, const u8 *pattab, int cells_dx, int tilex_ty) +{ + // Draw tiles across screen: + for (; cells_dx >= 0; cells_dx += 8, tilex_ty++, cells_dx -= 0x10000) + { + unsigned int pack, pal; + unsigned code; + + code = nametab[tilex_ty & 0x1f] << 3; + pal = coltab[code]; + pack = pattab[code]; + TileNormBgMg(cells_dx, pack, pal); + } +} + +/* Draw a scanline */ +static void DrawDisplayM3(int scanline) +{ + struct PicoVideo *pv = &Pico.video; + u8 *nametab, *coltab, *pattab; + int tilex, dx, cells; + int cellskip = 0; // XXX + int maxcells = 32; + + // name, color, pattern table: + nametab = PicoMem.vramb + ((pv->reg[2]<<10) & 0x3c00); + coltab = PicoMem.vramb + ((pv->reg[3]<< 6) & 0x2000); + pattab = PicoMem.vramb + ((pv->reg[4]<<11) & 0x2000); + + nametab += ((scanline>>3) << 5); + coltab += ((scanline>>6) <<11) + (scanline & 0x7); + pattab += ((scanline>>6) <<11) + (scanline & 0x7); + + tilex = cellskip & 0x1f; + cells = maxcells - cellskip - 1; + dx = (cellskip << 3) + line_offset; + + // tiles + if (!(pv->debug_p & PVD_KILL_B)) + DrawStripM3(nametab, coltab, pattab, dx | (cells << 16), tilex | (scanline << 16)); + + // sprites + if (!(pv->debug_p & PVD_KILL_S_LO)) + DrawSpritesTMS(); +} + + +/* Mode 0 - Graphics I */ +/*=====================*/ + +/* Draw the background into a scanline; cells, dx, tilex, ty merged to reduce registers */ +static void DrawStripM0(const u8 *nametab, const u8 *coltab, const u8 *pattab, int cells_dx, int tilex_ty) +{ + // Draw tiles across screen: + for (; cells_dx >= 0; cells_dx += 8, tilex_ty++, cells_dx -= 0x10000) + { + unsigned int pack, pal; + unsigned code; + + code = nametab[tilex_ty & 0x1f]; + pal = coltab[code >> 3]; + pack = pattab[code << 3]; + TileNormBgMg(cells_dx, pack, pal); + } +} + +/* Draw a scanline */ +static void DrawDisplayM0(int scanline) +{ + struct PicoVideo *pv = &Pico.video; + u8 *nametab, *coltab, *pattab; + int tilex, dx, cells; + int cellskip = 0; // XXX + int maxcells = 32; + + // name, color, pattern table: + nametab = PicoMem.vramb + ((pv->reg[2]<<10) & 0x3c00); + coltab = PicoMem.vramb + ((pv->reg[3]<< 6) & 0x3fc0); + pattab = PicoMem.vramb + ((pv->reg[4]<<11) & 0x3800); + + nametab += (scanline>>3) << 5; + pattab += (scanline & 0x7); + + tilex = cellskip & 0x1f; + cells = maxcells - cellskip - 1; + dx = (cellskip << 3) + line_offset; + + // tiles + if (!(pv->debug_p & PVD_KILL_B)) + DrawStripM0(nametab, coltab, pattab, dx | (cells << 16), tilex | (scanline << 16)); + + // sprites + if (!(pv->debug_p & PVD_KILL_S_LO)) + DrawSpritesTMS(); +} + + +/* Common/global */ +/*===============*/ + +static void FinalizeLineRGB555SMS(int line); +static void FinalizeLine8bitSMS(int line); + +void PicoFrameStartSMS(void) +{ + struct PicoEState *est = &Pico.est; + int lines = 192, columns = 256, loffs, coffs; + + skip_next_line = 0; + loffs = screen_offset = 24; // 192 lines is really 224 with top/bottom bars + est->rendstatus = PDRAW_32_COLS; + + // if mode changes make palette dirty since some modes switch to a fixed one + if (mode != ((Pico.video.reg[0]&0x06) | (Pico.video.reg[1]&0x18))) { + mode = (Pico.video.reg[0]&0x06) | (Pico.video.reg[1]&0x18); + Pico.m.dirtyPal = 1; + } + + Pico.m.hardware &= ~PMS_HW_TMS; + if (PicoIn.tmsPalette || (PicoIn.AHW & (PAHW_SG|PAHW_SC))) + Pico.m.hardware |= PMS_HW_TMS; + + // Copy LCD enable flag for easier handling + Pico.m.hardware &= ~PMS_HW_LCD; + if ((PicoIn.opt & POPT_EN_GG_LCD) && (PicoIn.AHW & PAHW_GG)) { + Pico.m.hardware |= PMS_HW_LCD; + + // GG LCD always has 160x144 regardless of settings + screen_offset = 24; // nonetheless the vdp timing has 224 lines + loffs = 48; + lines = 144; + columns = 160; + } else { + if ((mode & 4) && (Pico.video.reg[0] & 0x20)) { + // SMS mode 4 with 1st column blanked + est->rendstatus |= PDRAW_SMS_BLANK_1; + columns = 248; + } + + switch (mode) { + // SMS2 only 224/240 line modes, e.g. Micro Machines + case 0x06|0x08: + est->rendstatus |= PDRAW_30_ROWS; + loffs = screen_offset = 0; + lines = 240; + break; + case 0x06|0x10: + loffs = screen_offset = 8; + lines = 224; + break; } } - if (rendstatus != rendstatus_old || lines != rendlines) { - emu_video_mode_change(screen_offset, lines, 1); + line_offset = 8; // FinalizeLine requires HighCol+8 + // ugh... nonetheless has offset in 8-bit fast mode if 1st col blanked! + coffs = (FinalizeLineSMS == NULL && columns == 248 ? 8 : 0); + if (FinalizeLineSMS != NULL && (PicoIn.opt & POPT_EN_SOFTSCALE)) { + // softscaling always generates 320px, but no scaling in 8bit fast + est->rendstatus |= PDRAW_SOFTSCALE; + coffs = 0; + columns = 320; + } else if (!(PicoIn.opt & POPT_DIS_32C_BORDER)) { + est->rendstatus |= PDRAW_BORDER_32; + line_offset -= coffs; + coffs = (320-columns) / 2; + if (FinalizeLineSMS == NULL) + line_offset += coffs; // ... else centering done in FinalizeLine + } + + if (est->rendstatus != rendstatus_old || lines != rendlines) { + // mode_change() might reset rendstatus_old by calling SetOutFormat + int rendstatus = est->rendstatus; + emu_video_mode_change(loffs, lines, coffs, columns); rendstatus_old = rendstatus; rendlines = lines; + sprites = 0; } - DrawLineDest = (char *)DrawLineDestBase + screen_offset * DrawLineDestIncrement; + est->HighCol = HighColBase + screen_offset * HighColIncrement; + est->DrawLineDest = (char *)DrawLineDestBase + screen_offset * DrawLineDestIncrement; + + if (FinalizeLineSMS == FinalizeLine8bitSMS) { + Pico.m.dirtyPal = (Pico.m.dirtyPal || est->SonicPalCount ? 2 : 0); + memcpy(est->SonicPal, PicoMem.cram, 0x40*2); + } + est->SonicPalCount = 0; } -void PicoLineMode4(int line) +void PicoParseSATSMS(int line) { - if (skip_next_line > 0) { - skip_next_line--; + if (Pico.video.reg[0] & 0x04) ParseSpritesM4(line); + else ParseSpritesTMS(line); +} + +void PicoLineSMS(int line) +{ + int skip = skip_next_line; + unsigned bgcolor; + + // GG LCD, render only visible part of screen + if ((Pico.m.hardware & PMS_HW_LCD) && (line < 24 || line >= 24+144)) + goto norender; + + if (PicoScanBegin != NULL && skip == 0) + skip = PicoScanBegin(line + screen_offset); + + if (skip) { + skip_next_line = skip - 1; return; } - if (PicoScanBegin != NULL) - skip_next_line = PicoScanBegin(line + screen_offset); - // Draw screen: - BackFill(Pico.video.reg[7] & 0x0f, 0); - if (Pico.video.reg[1] & 0x40) - DrawDisplayM4(line); + bgcolor = (Pico.video.reg[7] & 0x0f) | ((Pico.video.reg[0] & 0x04) << 2); + BackFill(bgcolor, 0, &Pico.est); // bgcolor is from 2nd palette in mode 4 + if (Pico.video.reg[1] & 0x40) { + if (Pico.video.reg[0] & 0x04) DrawDisplayM4(line); // also M4+M3 + else if (Pico.video.reg[1] & 0x08) DrawDisplayM2(line); // also M2+M3 + else if (Pico.video.reg[1] & 0x10) DrawDisplayM1(line); // also M1+M3 + else if (Pico.video.reg[0] & 0x02) DrawDisplayM3(line); + else DrawDisplayM0(line); + } - if (FinalizeLineM4 != NULL) - FinalizeLineM4(line); + if (FinalizeLineSMS != NULL) + FinalizeLineSMS(line); if (PicoScanEnd != NULL) skip_next_line = PicoScanEnd(line + screen_offset); - DrawLineDest = (char *)DrawLineDest + DrawLineDestIncrement; +norender: + Pico.est.HighCol += HighColIncrement; + Pico.est.DrawLineDest = (char *)Pico.est.DrawLineDest + DrawLineDestIncrement; } -void PicoDoHighPal555M4(void) +/* Palette for TMS9918 mode, see https://www.smspower.org/Development/Palette */ +// RGB values: #000000 #000000 #21c842 #5edc78 #5455ed #7d76fc #d4524d #42ebf5 +// #fc5554 #ff7978 #d4c154 #e6ce80 #21b03b #c95bba #cccccc #ffffff +static u16 tmspal[] = { + // SMS palette + 0x0000, 0x0000, 0x00a0, 0x00f0, 0x0a00, 0x0f00, 0x0005, 0x0ff0, + 0x000a, 0x000f, 0x00aa, 0x00ff, 0x0050, 0x0f0f, 0x0aaa, 0x0fff, + // TMS palette + 0x0000, 0x0000, 0x04c2, 0x07d6, 0x0e55, 0x0f77, 0x055c, 0x0ee4, + 0x055f, 0x077f, 0x05bc, 0x08ce, 0x03a2, 0x0b5c, 0x0ccc, 0x0fff, + // SMS palette, closer to the TMS one + 0x0000, 0x0000, 0x05f0, 0x05f5, 0x0a50, 0x0f55, 0x055a, 0x0ff0, + 0x055f, 0x0aaf, 0x05aa, 0x05af, 0x00a0, 0x0f5f, 0x0aaa, 0x0fff, +}; + +void PicoDoHighPal555SMS(void) { - unsigned int *spal=(void *)Pico.cram; - unsigned int *dpal=(void *)HighPal; + u32 *spal = (void *)Pico.est.SonicPal; + u32 *dpal = (void *)Pico.est.HighPal; + unsigned int cnt = Pico.est.SonicPalCount+1; unsigned int t; - int i; + int i, j; + + if (FinalizeLineSMS == FinalizeLineRGB555SMS || Pico.m.dirtyPal == 2) + Pico.m.dirtyPal = 0; - Pico.m.dirtyPal = 0; + // use hardware palette if not in 8bit accurate mode + if (FinalizeLineSMS != FinalizeLine8bitSMS) + spal = (void *)PicoMem.cram; - /* cram is always stored as shorts, even though real hardware probably uses bytes */ - for (i = 0x20/2; i > 0; i--, spal++, dpal++) { - t = *spal; -#ifdef USE_BGR555 - t = ((t & 0x00030003)<< 3) | ((t & 0x000c000c)<<7) | ((t & 0x00300030)<<10); + /* SMS 6 bit cram data was already converted to MD/GG format by vdp write, + * hence GG/SMS/TMS can all be handled the same here */ + for (j = cnt; j > 0; j--) { + if (!(Pico.video.reg[0] & 0x4)) // fixed palette in TMS modes + spal = (u32 *)tmspal + (Pico.m.hardware & PMS_HW_TMS ? 16/2:0); + for (i = 0x20/2; i > 0; i--, spal++, dpal++) { + t = *spal; +#if defined(USE_BGR555) + t = ((t & 0x000f000f)<<1) | ((t & 0x00f000f0)<<2) | ((t & 0x0f000f00)<<3); + t |= (t >> 4) & 0x04210421; +#elif defined(USE_BGR565) + t = ((t & 0x000f000f)<<1) | ((t & 0x00f000f0)<<3) | ((t & 0x0f000f00)<<4); + t |= (t >> 4) & 0x08610861; #else - t = ((t & 0x00030003)<<14) | ((t & 0x000c000c)<<7) | ((t & 0x00300030)>>1); + t = ((t & 0x000f000f)<<12)| ((t & 0x00f000f0)<<3) | ((t & 0x0f000f00)>>7); + t |= (t >> 4) & 0x08610861; #endif - t |= t >> 2; - t |= (t >> 4) & 0x08610861; - *dpal = t; + *dpal = t; + } + memcpy(dpal, dpal-0x20/2, 0x20*2); // for prio bit + spal += 0x20/2, dpal += 0x20/2; } - HighPal[0xe0] = 0; + Pico.est.HighPal[0xe0] = 0; } -static void FinalizeLineRGB555M4(int line) +static void FinalizeLineRGB555SMS(int line) { if (Pico.m.dirtyPal) - PicoDoHighPal555M4(); + PicoDoHighPal555SMS(); // standard FinalizeLine can finish it for us, // with features like scaling and such - FinalizeLine555(0, line); + FinalizeLine555(0, line, &Pico.est); } -static void FinalizeLine8bitM4(int line) +static void FinalizeLine8bitSMS(int line) { - unsigned char *pd = DrawLineDest; - - if (!(PicoOpt & POPT_DIS_32C_BORDER)) - pd += 32; - - memcpy32((int *)pd, (int *)(HighCol+8), 256/4); + FinalizeLine8bit(0, line, &Pico.est); } -void PicoDrawSetOutputMode4(pdso_t which) +void PicoDrawSetOutputSMS(pdso_t which) { switch (which) { - case PDF_8BIT: FinalizeLineM4 = FinalizeLine8bitM4; break; - case PDF_RGB555: FinalizeLineM4 = FinalizeLineRGB555M4; break; - default: FinalizeLineM4 = NULL; break; + case PDF_8BIT: FinalizeLineSMS = FinalizeLine8bitSMS; break; + case PDF_RGB555: FinalizeLineSMS = FinalizeLineRGB555SMS; break; + default: FinalizeLineSMS = NULL; // no multiple palettes, no scaling + PicoDrawSetInternalBuf(Pico.est.Draw2FB, 328); break; } + rendstatus_old = -1; + mode = -1; } +// vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/patch.c b/pico/patch.c index b961e802..cd4620d7 100644 --- a/pico/patch.c +++ b/pico/patch.c @@ -22,18 +22,20 @@ */ #include "pico_int.h" +#include "memory.h" #include "patch.h" struct patch { - unsigned int addr; - unsigned short data; + unsigned int addr; + unsigned short data; + unsigned char comp; }; struct patch_inst *PicoPatches = NULL; int PicoPatchCount = 0; -static char genie_chars[] = "AaBbCcDdEeFfGgHhJjKkLlMmNnPpRrSsTtVvWwXxYyZz0O1I2233445566778899"; +static char genie_chars_md[] = "AaBbCcDdEeFfGgHhJjKkLlMmNnPpRrSsTtVvWwXxYyZz0O1I2233445566778899"; /* genie_decode * This function converts a Game Genie code to an address:data pair. @@ -47,20 +49,23 @@ static char genie_chars[] = "AaBbCcDdEeFfGgHhJjKkLlMmNnPpRrSsTtVvWwXxYyZz0O1I223 * by result. If an error results, both the address and data will be set to -1. */ -static void genie_decode(const char* code, struct patch* result) +static void genie_decode_md(const char* code, struct patch* result) { int i = 0, n; char* x; - for(; i < 8; ++i) + for(; i < 9; ++i) { + /* Skip i=4; it's going to be the separating hyphen */ + if (i==4) continue; + /* If strchr returns NULL, we were given a bad character */ - if(!(x = strchr(genie_chars, code[i]))) + if(!(x = strchr(genie_chars_md, code[i]))) { result->addr = -1; result->data = -1; return; } - n = (x - genie_chars) >> 1; + n = (x - genie_chars_md) >> 1; /* Now, based on which character this is, fit it into the result */ switch(i) { @@ -81,21 +86,21 @@ static void genie_decode(const char* code, struct patch* result) /* BCDE ____ ____ ___A ____ ____ : ____ ____ ____ ____ */ result->addr |= (n & 0xF) << 20 | (n >> 4) << 8; break; - case 4: + case 5: /* ____ ABCD ____ ____ ____ ____ : ___E ____ ____ ____ */ result->data |= (n & 1) << 12; result->addr |= (n >> 1) << 16; break; - case 5: + case 6: /* ____ ____ ____ ____ ____ ____ : E___ ABCD ____ ____ */ result->data |= (n & 1) << 15 | (n >> 1) << 8; break; - case 6: + case 7: /* ____ ____ ____ ____ CDE_ ____ : _AB_ ____ ____ ____ */ result->data |= (n >> 3) << 13; result->addr |= (n & 7) << 5; break; - case 7: + case 8: /* ____ ____ ____ ____ ___A BCDE : ____ ____ ____ ____ */ result->addr |= n; break; @@ -113,223 +118,400 @@ static void genie_decode(const char* code, struct patch* result) static char hex_chars[] = "00112233445566778899AaBbCcDdEeFf"; -static void hex_decode(const char *code, struct patch *result) +static void hex_decode_md(const char *code, struct patch *result) { char *x; int i; /* 6 digits for address */ for(i = 0; i < 6; ++i) + { + if(!(x = strchr(hex_chars, code[i]))) { - if(!(x = strchr(hex_chars, code[i]))) - { - result->addr = result->data = -1; - return; - } - result->addr = (result->addr << 4) | ((x - hex_chars) >> 1); + result->addr = result->data = -1; + return; } + result->addr = (result->addr << 4) | ((x - hex_chars) >> 1); + } /* 4 digits for data */ - for(i = 6; i < 10; ++i) + for(i = 7; i < 11; ++i) + { + if(!(x = strchr(hex_chars, code[i]))) { + if (i==8) break; + result->addr = result->data = -1; + return; + } + result->data = (result->data << 4) | ((x - hex_chars) >> 1); + } +} + +void genie_decode_ms(const char *code, struct patch *result) +{ + char *x; + int i; + /* 2 digits for data */ + for(i=0;i<2;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->data = (result->data << 4) | ((x - hex_chars) >> 1); + } + /* 4 digits for address */ + for(i=2;i<7;++i) + { + /* 4th character is hyphen and can be skipped*/ + if (i==3) continue; + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->addr = (result->addr << 4) | ((x - hex_chars) >> 1); + } + /* Correct the address */ + result->addr = ((result->addr >> 4) | (result->addr << 12 & 0xF000)) ^ 0xF000; + /* Optional: 3 digits for comp */ + if (code[7]=='-') + { + for(i=8;i<11;++i) + { + if (i==9) continue; /* 2nd character is ignored */ if(!(x = strchr(hex_chars, code[i]))) { - result->addr = result->data = -1; - return; + result->addr = result->data = -1; + return; } - result->data = (result->data << 4) | ((x - hex_chars) >> 1); + result->comp = (result->comp << 4) | ((x - hex_chars) >> 1); } + /* Correct the comp */ + result->comp = ((result->comp >> 2) | ((result->comp << 6) & 0xC0)) ^ 0xBA; + } +} + +void ar_decode_ms(const char *code, struct patch *result){ + char *x; + int i; + /* 2 digits of padding*/ + /* 4 digits for address */ + for(i=2;i<7;++i) + { + /* 5th character is hyphen and can be skipped*/ + if (i==4) continue; + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->addr = (result->addr << 4) | ((x - hex_chars) >> 1); + } + /* 2 digits for data */ + for(i=7;i<9;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->data = (result->data << 4) | ((x - hex_chars) >> 1); + } +} + +void fusion_ram_decode(const char *code, struct patch *result){ + char *x; + int i; + /* 4 digits for address */ + for(i=0;i<4;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->addr = (result->addr << 4) | ((x - hex_chars) >> 1); + } + /* Skip the ':' */ + /* 2 digits for data */ + for(i=5;i<7;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->data = (result->data << 4) | ((x - hex_chars) >> 1); + } +} + +void fusion_rom_decode(const char *code, struct patch *result){ + char *x; + int i; + /* 2 digits for comp */ + for(i=0;i<2;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->comp = (result->comp << 4) | ((x - hex_chars) >> 1); + } + /* 4 digits for address */ + for(i=2;i<6;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->addr = (result->addr << 4) | ((x - hex_chars) >> 1); + } + /* 2 digits for data */ + for(i=7;i<9;++i) + { + if(!(x = strchr(hex_chars, code[i]))) + { + result->addr = result->data = -1; + return; + } + result->data = (result->data << 4) | ((x - hex_chars) >> 1); + } } /* THIS is the function you call from the MegaDrive or whatever. This figures * out whether it's a genie or hex code, depunctuates it, and calls the proper * decoder. */ -static void decode(const char* code, struct patch* result) +void decode(const char* code, struct patch* result) { - int len = strlen(code), i, j; - char code_to_pass[16], *x; - const char *ad, *da; - int adl, dal; + int len = strlen(code); /* Initialize the result */ - result->addr = result->data = 0; + result->addr = result->data = result->comp = 0; - /* Just assume 8 char long string to be Game Genie code */ - if (len == 8) + if(!(PicoIn.AHW & PAHW_SMS)) { - genie_decode(code, result); - return; - } + //If Genesis - /* If it's 9 chars long and the 5th is a hyphen, we have a Game Genie - * code. */ + //Game Genie if(len == 9 && code[4] == '-') { - /* Remove the hyphen and pass to genie_decode */ - code_to_pass[0] = code[0]; - code_to_pass[1] = code[1]; - code_to_pass[2] = code[2]; - code_to_pass[3] = code[3]; - code_to_pass[4] = code[5]; - code_to_pass[5] = code[6]; - code_to_pass[6] = code[7]; - code_to_pass[7] = code[8]; - code_to_pass[8] = '\0'; - genie_decode(code_to_pass, result); + genie_decode_md(code, result); return; } - /* Otherwise, we assume it's a hex code. - * Find the colon so we know where address ends and data starts. If there's - * no colon, then we haven't a code at all! */ - if(!(x = strchr(code, ':'))) goto bad_code; - ad = code; da = x + 1; adl = x - code; dal = len - adl - 1; + //Master + else if(len >=9 && code[6] == ':') + { + hex_decode_md(code, result); + } - /* If a section is empty or too long, toss it */ - if(adl == 0 || adl > 6 || dal == 0 || dal > 4) goto bad_code; + else + { + goto bad_code; + } + } else { + //If Master System - /* Pad the address with zeros, then fill it with the value */ - for(i = 0; i < (6 - adl); ++i) code_to_pass[i] = '0'; - for(j = 0; i < 6; ++i, ++j) code_to_pass[i] = ad[j]; + //Genie + if(len >= 7 && code[3] == '-') + { + genie_decode_ms(code, result); + } - /* Do the same for data */ - for(i = 6; i < (10 - dal); ++i) code_to_pass[i] = '0'; - for(j = 0; i < 10; ++i, ++j) code_to_pass[i] = da[j]; + //AR + else if(len == 9 && code[4] == '-') + { + ar_decode_ms(code, result); + } - code_to_pass[10] = '\0'; + //Fusion RAM + else if(len == 7 && code[4] == ':') + { + fusion_ram_decode(code, result); + } + + //Fusion ROM + else if(len == 9 && code[6] == ':') + { + fusion_rom_decode(code, result); + } + + else + { + goto bad_code; + } + + //Convert RAM address space to Genesis location. + if (result->addr>=0xC000) + result->addr= 0xFF0000 | (0x1FFF & result->addr); + } - /* Decode and goodbye */ - hex_decode(code_to_pass, result); return; -bad_code: - - /* AGH! Invalid code! */ + bad_code: result->data = result->addr = -1; return; } - - -unsigned int PicoRead16(unsigned int a); -void PicoWrite16(unsigned int a, unsigned short d); - - void PicoPatchUnload(void) { - if (PicoPatches != NULL) - { - free(PicoPatches); - PicoPatches = NULL; - } - PicoPatchCount = 0; + if (PicoPatches != NULL) + { + free(PicoPatches); + PicoPatches = NULL; + } + PicoPatchCount = 0; } int PicoPatchLoad(const char *fname) { - FILE *f; - char buff[256]; - struct patch pt; - int array_len = 0; + FILE *f; + char buff[256]; + struct patch pt; + int array_len = 0; - PicoPatchUnload(); + PicoPatchUnload(); - f = fopen(fname, "r"); - if (f == NULL) - { - return -1; - } + f = fopen(fname, "r"); + if (f == NULL) + { + return -1; + } - while (fgets(buff, sizeof(buff), f)) - { - int llen, clen; + while (fgets(buff, sizeof(buff), f)) + { + int llen, clen; - llen = strlen(buff); - for (clen = 0; clen < llen; clen++) - if (isspace_(buff[clen])) - break; - buff[clen] = 0; + llen = strlen(buff); + for (clen = 0; clen < llen; clen++) + if (isspace_(buff[clen])) + break; + buff[clen] = 0; - if (clen > 11 || clen < 8) - continue; + if (clen > 11 || clen < 8) + continue; - decode(buff, &pt); - if (pt.addr == (unsigned int)-1 || pt.data == (unsigned short)-1) - continue; + decode(buff, &pt); + if (pt.addr == (unsigned int)-1 || pt.data == (unsigned short)-1) + continue; - /* code was good, add it */ - if (array_len < PicoPatchCount + 1) - { - void *ptr; - array_len *= 2; - array_len++; - ptr = realloc(PicoPatches, array_len * sizeof(PicoPatches[0])); - if (ptr == NULL) break; - PicoPatches = ptr; - } - strcpy(PicoPatches[PicoPatchCount].code, buff); - /* strip */ - for (clen++; clen < llen; clen++) - if (!isspace_(buff[clen])) - break; - for (llen--; llen > 0; llen--) - if (!isspace_(buff[llen])) - break; - buff[llen+1] = 0; - strncpy(PicoPatches[PicoPatchCount].name, buff + clen, 51); - PicoPatches[PicoPatchCount].name[51] = 0; - PicoPatches[PicoPatchCount].active = 0; - PicoPatches[PicoPatchCount].addr = pt.addr; - PicoPatches[PicoPatchCount].data = pt.data; - PicoPatches[PicoPatchCount].data_old = 0; - PicoPatchCount++; - // fprintf(stderr, "loaded patch #%i: %06x:%04x \"%s\"\n", PicoPatchCount-1, pt.addr, pt.data, - // PicoPatches[PicoPatchCount-1].name); - } - fclose(f); + /* code was good, add it */ + if (array_len < PicoPatchCount + 1) + { + void *ptr; + array_len *= 2; + array_len++; + ptr = realloc(PicoPatches, array_len * sizeof(PicoPatches[0])); + if (ptr == NULL) break; + PicoPatches = ptr; + } + strcpy(PicoPatches[PicoPatchCount].code, buff); + /* strip */ + for (clen++; clen < llen; clen++) + if (!isspace_(buff[clen])) + break; + for (llen--; llen > 0; llen--) + if (!isspace_(buff[llen])) + break; + buff[llen+1] = 0; + strncpy(PicoPatches[PicoPatchCount].name, buff + clen, 51); + PicoPatches[PicoPatchCount].name[51] = 0; + PicoPatches[PicoPatchCount].active = 0; + PicoPatches[PicoPatchCount].addr = pt.addr; + PicoPatches[PicoPatchCount].data = pt.data; + PicoPatches[PicoPatchCount].data_old = 0; + PicoPatchCount++; + // fprintf(stderr, "loaded patch #%i: %06x:%04x \"%s\"\n", PicoPatchCount-1, pt.addr, pt.data, + // PicoPatches[PicoPatchCount-1].name); + } + fclose(f); - return 0; + return 0; } /* to be called when the Rom is loaded and byteswapped */ void PicoPatchPrepare(void) { - int i; + int i; + int addr; - for (i = 0; i < PicoPatchCount; i++) - { - PicoPatches[i].addr &= ~1; - if (PicoPatches[i].addr < Pico.romsize) - PicoPatches[i].data_old = *(unsigned short *)(Pico.rom + PicoPatches[i].addr); - if (strstr(PicoPatches[i].name, "AUTO")) - PicoPatches[i].active = 1; - } + for (i = 0; i < PicoPatchCount; i++) + { + addr=PicoPatches[i].addr; + addr &= ~1; + if (addr < Pico.romsize) + PicoPatches[i].data_old = *(u16 *)(Pico.rom + addr); + else + { + if(!(PicoIn.AHW & PAHW_SMS)) + PicoPatches[i].data_old = (u16) m68k_read16(addr); + else + ;// wrong: PicoPatches[i].data_old = (unsigned char) PicoRead8_z80(addr); + } + if (strstr(PicoPatches[i].name, "AUTO")) + PicoPatches[i].active = 1; + } } void PicoPatchApply(void) { - int i, u; - unsigned int addr; + int i, u; + unsigned int addr; - for (i = 0; i < PicoPatchCount; i++) - { - addr = PicoPatches[i].addr; - if (addr < Pico.romsize) - { - if (PicoPatches[i].active) - *(unsigned short *)(Pico.rom + addr) = PicoPatches[i].data; - else { - // if current addr is not patched by older patch, write back original val - for (u = 0; u < i; u++) - if (PicoPatches[u].addr == addr) break; - if (u == i) - *(unsigned short *)(Pico.rom + addr) = PicoPatches[i].data_old; - } - // fprintf(stderr, "patched %i: %06x:%04x\n", PicoPatches[i].active, addr, - // *(unsigned short *)(Pico.rom + addr)); - } - else - { - /* TODO? */ - } - } + for (i = 0; i < PicoPatchCount; i++) + { + addr = PicoPatches[i].addr; + + if (addr < Pico.romsize) + { + if (PicoPatches[i].active) + { + if (!(PicoIn.AHW & PAHW_SMS)) + *(u16 *)(Pico.rom + addr) = PicoPatches[i].data; + else if (!PicoPatches[i].comp || PicoPatches[i].comp == *(char *)(Pico.rom + addr)) + *(char *)(Pico.rom + addr) = (char) PicoPatches[i].data; + } + else + { + // if current addr is not patched by older patch, write back original val + for (u = 0; u < i; u++) + if (PicoPatches[u].addr == addr) break; + if (u == i) + { + if (!(PicoIn.AHW & PAHW_SMS)) + *(u16 *)(Pico.rom + addr) = PicoPatches[i].data_old; + else + *(char *)(Pico.rom + addr) = (char) PicoPatches[i].data_old; + } + } + // fprintf(stderr, "patched %i: %06x:%04x\n", PicoPatches[i].active, addr, + // *(u16 *)(Pico.rom + addr)); + } + else + { + if (PicoPatches[i].active) + { + if (!(PicoIn.AHW & PAHW_SMS)) + m68k_write16(addr,PicoPatches[i].data); + else + ;// wrong: PicoWrite8_z80(addr,PicoPatches[i].data); + } + else + { + // if current addr is not patched by older patch, write back original val + for (u = 0; u < i; u++) + if (PicoPatches[u].addr == addr) break; + if (u == i) + { + if (!(PicoIn.AHW & PAHW_SMS)) + m68k_write16(PicoPatches[i].addr,PicoPatches[i].data_old); + else + ;// wrong: PicoWrite8_z80(PicoPatches[i].addr,PicoPatches[i].data_old); + } + } + } + } } diff --git a/pico/patch.h b/pico/patch.h index 6e9420f8..9ab86d36 100644 --- a/pico/patch.h +++ b/pico/patch.h @@ -13,6 +13,7 @@ struct patch_inst unsigned int addr; unsigned short data; unsigned short data_old; + unsigned char comp; }; extern struct patch_inst *PicoPatches; diff --git a/pico/pico.c b/pico/pico.c index 6888080b..6d061c2a 100644 --- a/pico/pico.c +++ b/pico/pico.c @@ -2,6 +2,7 @@ * PicoDrive * (c) Copyright Dave, 2004 * (C) notaz, 2006-2010 + * (C) irixxxx, 2020-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -11,20 +12,9 @@ #include "sound/ym2612.h" struct Pico Pico; -int PicoOpt; -int PicoSkipFrame; // skip rendering frame? -int PicoPad[2]; // Joypads, format is MXYZ SACB RLDU -int PicoPadInt[2]; // internal copy -int PicoAHW; // active addon hardware: PAHW_* -int PicoQuirks; // game-specific quirks -int PicoRegionOverride; // override the region detection 0: Auto, 1: Japan NTSC, 2: Japan PAL, 4: US, 8: Europe -int PicoAutoRgnOrder; +struct PicoMem PicoMem; +PicoInterface PicoIn; -struct PicoSRAM SRam; -int emustatus; // rapid_ym2612, multi_ym_updates -int scanlines_total; - -void (*PicoWriteSound)(int len) = NULL; // called at the best time to send sound buffer (PsndOut) to hardware void (*PicoResetHook)(void) = NULL; void (*PicoLineHook)(void) = NULL; @@ -33,8 +23,14 @@ void PicoInit(void) { // Blank space for state: memset(&Pico,0,sizeof(Pico)); - memset(&PicoPad,0,sizeof(PicoPad)); - memset(&PicoPadInt,0,sizeof(PicoPadInt)); + memset(&PicoMem,0,sizeof(PicoMem)); + memset(&PicoIn.pad,0,sizeof(PicoIn.pad)); + memset(&PicoIn.padInt,0,sizeof(PicoIn.padInt)); + + Pico.est.Pico = &Pico; + Pico.est.PicoMem_vram = PicoMem.vram; + Pico.est.PicoMem_cram = PicoMem.cram; + Pico.est.PicoOpt = &PicoIn.opt; // Init CPUs: SekInit(); @@ -43,65 +39,80 @@ void PicoInit(void) PicoInitMCD(); PicoSVPInit(); Pico32xInit(); + PsndInit(); + + PicoVideoInit(); + PicoDrawInit(); + PicoDraw2Init(); } // to be called once on emu exit void PicoExit(void) { - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) PicoExitMCD(); PicoCartUnload(); z80_exit(); + PsndExit(); - if (SRam.data) - free(SRam.data); + free(Pico.sv.data); + Pico.sv.data = NULL; + Pico.sv.start = Pico.sv.end = 0; pevt_dump(); } void PicoPower(void) { Pico.m.frame_count = 0; - SekCycleCnt = SekCycleAim = 0; + Pico.t.m68c_cnt = Pico.t.m68c_aim = 0; // clear all memory of the emulated machine - memset(&Pico.ram,0,(unsigned char *)&Pico.rom - Pico.ram); + memset(&PicoMem,0,sizeof(PicoMem)); memset(&Pico.video,0,sizeof(Pico.video)); memset(&Pico.m,0,sizeof(Pico.m)); - - Pico.video.pending_ints=0; - z80_reset(); + memset(&Pico.t,0,sizeof(Pico.t)); // my MD1 VA6 console has this in IO - Pico.ioports[1] = Pico.ioports[2] = Pico.ioports[3] = 0xff; + PicoMem.ioports[1] = PicoMem.ioports[2] = PicoMem.ioports[3] = 0xff; - // default VDP register values (based on Fusion) - Pico.video.reg[0] = Pico.video.reg[1] = 0x04; - Pico.video.reg[0xc] = 0x81; - Pico.video.reg[0xf] = 0x02; + Pico.video.hint_irq = (PicoIn.AHW & PAHW_PICO ? 5 : 4); - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) PicoPowerMCD(); - if (PicoOpt & POPT_EN_32X) + if (PicoIn.opt & POPT_EN_32X) PicoPower32x(); PicoReset(); + + // powerup default VDP register values from TMSS BIOS + Pico.video.reg[0] = Pico.video.reg[1] = 0x04; + Pico.video.reg[0xc] = 0x81; + Pico.video.reg[0xf] = 0x02; + SATaddr = 0x0000; + SATmask = ~0x3ff; } PICO_INTERNAL void PicoDetectRegion(void) { int support=0, hw=0, i; unsigned char pal=0; + char *pr = (char *)(Pico.rom + 0x1f0); - if (PicoRegionOverride) + if (PicoIn.regionOverride) { - support = PicoRegionOverride; + support = PicoIn.regionOverride; + } + else if (strcmp(pr, "EUROPE") == 0 || strcmp(pr, "Europe") == 0) + { + // Unusual cartridge region 'code' + support|=8; } else { // Read cartridge region data: - unsigned short *rd = (unsigned short *)(Pico.rom + 0x1f0); + unsigned short *rd = (unsigned short *)pr; int region = (rd[0] << 16) | rd[1]; for (i = 0; i < 4; i++) @@ -129,10 +140,10 @@ PICO_INTERNAL void PicoDetectRegion(void) } // auto detection order override - if (PicoAutoRgnOrder) { - if (((PicoAutoRgnOrder>>0)&0xf) & support) support = (PicoAutoRgnOrder>>0)&0xf; - else if (((PicoAutoRgnOrder>>4)&0xf) & support) support = (PicoAutoRgnOrder>>4)&0xf; - else if (((PicoAutoRgnOrder>>8)&0xf) & support) support = (PicoAutoRgnOrder>>8)&0xf; + if (PicoIn.autoRgnOrder) { + if (((PicoIn.autoRgnOrder>>0)&0xf) & support) support = (PicoIn.autoRgnOrder>>0)&0xf; + else if (((PicoIn.autoRgnOrder>>4)&0xf) & support) support = (PicoIn.autoRgnOrder>>4)&0xf; + else if (((PicoIn.autoRgnOrder>>8)&0xf) & support) support = (PicoIn.autoRgnOrder>>8)&0xf; } // Try to pick the best hardware value for English/50hz: @@ -142,7 +153,9 @@ PICO_INTERNAL void PicoDetectRegion(void) else if (support&1) hw=0x00; // Japan NTSC else hw=0x80; // USA - Pico.m.hardware=(unsigned char)(hw|0x20); // No disk attached + if (!(PicoIn.AHW & PAHW_MCD)) hw |= 0x20; // No disk attached + + Pico.m.hardware=(unsigned char)hw; Pico.m.pal=pal; } @@ -152,17 +165,17 @@ int PicoReset(void) return 1; #if defined(CPU_CMP_R) || defined(CPU_CMP_W) || defined(DRC_CMP) - PicoOpt |= POPT_DIS_VDP_FIFO|POPT_DIS_IDLE_DET; + PicoIn.opt |= POPT_DIS_VDP_FIFO|POPT_DIS_IDLE_DET; #endif /* must call now, so that banking is reset, and correct vectors get fetched */ if (PicoResetHook) PicoResetHook(); - memset(&PicoPadInt,0,sizeof(PicoPadInt)); - emustatus = 0; + memset(&PicoIn.padInt, 0, sizeof(PicoIn.padInt)); - if (PicoAHW & PAHW_SMS) { + z80_reset(); + if (PicoIn.AHW & PAHW_SMS) { PicoResetMS(); return 0; } @@ -171,44 +184,43 @@ int PicoReset(void) // ..but do not reset SekCycle* to not desync with addons // s68k doesn't have the TAS quirk, so we just globally set normal TAS handler in MCD mode (used by Batman games). - SekSetRealTAS(PicoAHW & PAHW_MCD); - - Pico.m.dirtyPal = 1; + SekSetRealTAS(PicoIn.AHW & PAHW_MCD); Pico.m.z80_bank68k = 0; Pico.m.z80_reset = 1; PicoDetectRegion(); - Pico.video.status = 0x3428 | Pico.m.pal; // 'always set' bits | vblank | collision | pal + + PicoVideoReset(); PsndReset(); // pal must be known here // create an empty "dma" to cause 68k exec start at random frame location - if (Pico.m.dma_xfers == 0 && !(PicoOpt & POPT_DIS_VDP_FIFO)) - Pico.m.dma_xfers = rand() & 0x1fff; + Pico.t.m68c_line_start = Pico.t.m68c_aim; + PicoVideoFIFOWrite(rand() & 0x1fff, 0, 0, PVS_CPURD); SekFinishIdleDet(); - if (PicoAHW & PAHW_MCD) { + if (PicoIn.opt & POPT_EN_32X) + PicoReset32x(); + + if (PicoIn.AHW & PAHW_MCD) { PicoResetMCD(); return 0; } // reinit, so that checksum checks pass - if (!(PicoOpt & POPT_DIS_IDLE_DET)) + if (!(PicoIn.opt & POPT_DIS_IDLE_DET)) SekInitIdleDet(); - if (PicoOpt & POPT_EN_32X) - PicoReset32x(); - // reset sram state; enable sram access by default if it doesn't overlap with ROM Pico.m.sram_reg = 0; - if ((SRam.flags & SRF_EEPROM) || Pico.romsize <= SRam.start) + if ((Pico.sv.flags & SRF_EEPROM) || Pico.romsize <= Pico.sv.start) Pico.m.sram_reg |= SRR_MAPPED; - if (SRam.flags & SRF_ENABLED) - elprintf(EL_STATUS, "sram: %06x - %06x; eeprom: %i", SRam.start, SRam.end, - !!(SRam.flags & SRF_EEPROM)); + if (Pico.sv.flags & SRF_ENABLED) + elprintf(EL_STATUS, "sram: %06x - %06x; eeprom: %i", Pico.sv.start, Pico.sv.end, + !!(Pico.sv.flags & SRF_EEPROM)); return 0; } @@ -216,88 +228,49 @@ int PicoReset(void) // flush config changes before emu loop starts void PicoLoopPrepare(void) { - if (PicoRegionOverride) + if (PicoIn.regionOverride) // force setting possibly changed.. - Pico.m.pal = (PicoRegionOverride == 2 || PicoRegionOverride == 8) ? 1 : 0; + Pico.m.pal = (PicoIn.regionOverride == 2 || PicoIn.regionOverride == 8) ? 1 : 0; - // FIXME: PAL has 313 scanlines.. - scanlines_total = Pico.m.pal ? 312 : 262; + if (Pico.m.pal) { + Pico.t.vcnt_wrap = 0x103; + Pico.t.vcnt_adj = 57; + } + else { + Pico.t.vcnt_wrap = 0xEB; + Pico.t.vcnt_adj = 6; + } + PicoVideoFIFOMode(Pico.video.reg[1]&0x40, Pico.video.reg[12]&1); Pico.m.dirtyPal = 1; rendstatus_old = -1; -} - -// dma2vram settings are just hacks to unglitch Legend of Galahad (needs <= 104 to work) -// same for Outrunners (92-121, when active is set to 24) -// 96 is VR hack -static const int dma_timings[] = { - 167, 167, 166, 83, // vblank: 32cell: dma2vram dma2[vs|c]ram vram_fill vram_copy - 102, 205, 204, 102, // vblank: 40cell: - 16, 16, 15, 8, // active: 32cell: - 24, 18, 17, 9 // ... -}; - -static const int dma_bsycles[] = { - (488<<8)/167, (488<<8)/167, (488<<8)/166, (488<<8)/83, - (488<<8)/102, (488<<8)/233, (488<<8)/204, (488<<8)/102, - (488<<8)/16, (488<<8)/16, (488<<8)/15, (488<<8)/8, - (488<<8)/24, (488<<8)/18, (488<<8)/17, (488<<8)/9 -}; - -// grossly inaccurate.. FIXME FIXXXMEE -PICO_INTERNAL int CheckDMA(void) -{ - int burn = 0, xfers_can, dma_op = Pico.video.reg[0x17]>>6; // see gens for 00 and 01 modes - int xfers = Pico.m.dma_xfers; - int dma_op1; - - if(!(dma_op&2)) dma_op = (Pico.video.type==1) ? 0 : 1; // setting dma_timings offset here according to Gens - dma_op1 = dma_op; - if(Pico.video.reg[12] & 1) dma_op |= 4; // 40 cell mode? - if(!(Pico.video.status&8)&&(Pico.video.reg[1]&0x40)) dma_op|=8; // active display? - xfers_can = dma_timings[dma_op]; - if(xfers <= xfers_can) - { - if(dma_op&2) Pico.video.status&=~2; // dma no longer busy - else { - burn = xfers * dma_bsycles[dma_op] >> 8; // have to be approximate because can't afford division.. - } - Pico.m.dma_xfers = 0; - } else { - if(!(dma_op&2)) burn = 488; - Pico.m.dma_xfers -= xfers_can; - } - - elprintf(EL_VDPDMA, "~Dma %i op=%i can=%i burn=%i [%i]", Pico.m.dma_xfers, dma_op1, xfers_can, burn, SekCyclesDone()); - //dprintf("~aim: %i, cnt: %i", SekCycleAim, SekCycleCnt); - return burn; + if (PicoIn.AHW & PAHW_MCD) + PicoMCDPrepare(); + if (PicoIn.AHW & PAHW_32X) + Pico32xPrepare(); } #include "pico_cmn.c" -unsigned int last_z80_sync; /* in 68k cycles */ -int z80_cycle_cnt; -int z80_cycle_aim; -int z80_scanline; -int z80_scanline_cycles; /* cycles done until z80_scanline */ - /* sync z80 to 68k */ PICO_INTERNAL void PicoSyncZ80(unsigned int m68k_cycles_done) { + int m68k_cnt; int cnt; - z80_cycle_aim += cycles_68k_to_z80(m68k_cycles_done - last_z80_sync); - cnt = z80_cycle_aim - z80_cycle_cnt; - last_z80_sync = m68k_cycles_done; + + m68k_cnt = m68k_cycles_done - Pico.t.m68c_frame_start; + Pico.t.z80c_aim = cycles_68k_to_z80(m68k_cnt); + cnt = Pico.t.z80c_aim - Pico.t.z80c_cnt; pprof_start(z80); elprintf(EL_BUSREQ, "z80 sync %i (%u|%u -> %u|%u)", cnt, - z80_cycle_cnt, z80_cycle_cnt / 288, - z80_cycle_aim, z80_cycle_aim / 288); + Pico.t.z80c_cnt, Pico.t.z80c_cnt * 15 / 7 / 488, + Pico.t.z80c_aim, Pico.t.z80c_aim * 15 / 7 / 488); if (cnt > 0) - z80_cycle_cnt += z80_run(cnt); + Pico.t.z80c_cnt += z80_run(cnt); pprof_end(z80); } @@ -309,22 +282,22 @@ void PicoFrame(void) Pico.m.frame_count++; - if (PicoAHW & PAHW_SMS) { + if (PicoIn.AHW & PAHW_SMS) { PicoFrameMS(); goto end; } - if (PicoAHW & PAHW_32X) { + if (PicoIn.AHW & PAHW_32X) { PicoFrame32x(); // also does MCD+32X goto end; } - if (PicoAHW & PAHW_MCD) { + if (PicoIn.AHW & PAHW_MCD) { PicoFrameMCD(); goto end; } - //if(Pico.video.reg[12]&0x2) Pico.video.status ^= 0x10; // change odd bit in interlace mode + //if(Pico.video.reg[12]&0x2) Pico.video.status ^= SR_ODD; // change odd bit in interlace mode PicoFrameStart(); PicoFrameHints(); @@ -335,9 +308,9 @@ end: void PicoFrameDrawOnly(void) { - if (!(PicoAHW & PAHW_SMS)) { + if (!(PicoIn.AHW & PAHW_SMS)) { PicoFrameStart(); - PicoDrawSync(223, 0); + PicoDrawSync(Pico.m.pal?239:223, 0, 0); } else { PicoFrameDrawOnlyMS(); } @@ -354,6 +327,4 @@ void PicoGetInternal(pint_t which, pint_ret_t *r) } } -// callback to output message from emu -void (*PicoMessage)(const char *msg)=NULL; - +// vim:ts=2:sw=2:expandtab diff --git a/pico/pico.h b/pico/pico.h index 41ba7fb0..6759f73d 100644 --- a/pico/pico.h +++ b/pico/pico.h @@ -10,6 +10,7 @@ #ifndef PICO_H #define PICO_H +#include // [u]int_t #include // size_t #ifdef __cplusplus @@ -22,7 +23,7 @@ extern void lprintf(const char *fmt, ...); // external funcs for Sega/Mega CD extern int mp3_get_bitrate(void *f, int size); extern void mp3_start_play(void *f, int pos); -extern void mp3_update(int *buffer, int length, int stereo); +extern void mp3_update(s32 *buffer, int length, int stereo); // this function should write-back d-cache and invalidate i-cache // on a mem region [start_addr, end_addr) @@ -34,10 +35,13 @@ extern void cache_flush_d_inval_i(void *start_addr, void *end_addr); extern void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed); extern void *plat_mremap(void *ptr, size_t oldsize, size_t newsize); extern void plat_munmap(void *ptr, size_t size); + +// memory for the dynarec; plat_mem_get_for_drc() can just return NULL +extern void *plat_mem_get_for_drc(size_t size); extern int plat_mem_set_exec(void *ptr, size_t size); // this one should handle display mode changes -extern void emu_video_mode_change(int start_line, int line_count, int is_32cols); +extern void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count); // this must switch to 16bpp mode extern void emu_32x_startup(void); @@ -52,39 +56,86 @@ extern void *p32x_bios_g, *p32x_bios_m, *p32x_bios_s; #define POPT_EN_Z80 (1<< 2) #define POPT_EN_STEREO (1<< 3) #define POPT_ALT_RENDERER (1<< 4) // 00 00x0 -// unused (1<< 5) -// unused (1<< 6) +#define POPT_EN_YM2413 (1<< 5) +#define POPT_EN_SNDFILTER (1<< 6) #define POPT_ACC_SPRITES (1<< 7) #define POPT_DIS_32C_BORDER (1<< 8) // 00 0x00 #define POPT_EXT_FM (1<< 9) #define POPT_EN_MCD_PCM (1<<10) #define POPT_EN_MCD_CDDA (1<<11) #define POPT_EN_MCD_GFX (1<<12) // 00 x000 -// unused (1<<13) +#define POPT_EN_GG_LCD (1<<13) #define POPT_EN_SOFTSCALE (1<<14) #define POPT_EN_MCD_RAMCART (1<<15) #define POPT_DIS_VDP_FIFO (1<<16) // 0x 0000 #define POPT_EN_DRC (1<<17) #define POPT_DIS_SPRITE_LIM (1<<18) #define POPT_DIS_IDLE_DET (1<<19) -#define POPT_EN_32X (1<<20) +#define POPT_EN_32X (1<<20) // x0 0000 #define POPT_EN_PWM (1<<21) -extern int PicoOpt; // bitfield +#define POPT_PWM_IRQ_OPT (1<<22) +#define POPT_DIS_FM_SSGEG (1<<23) +#define POPT_EN_FM_DAC (1<<24) //x00 0000 +#define POPT_EN_FM_FILTER (1<<25) -#define PAHW_MCD (1<<0) -#define PAHW_32X (1<<1) -#define PAHW_SVP (1<<2) -#define PAHW_PICO (1<<3) -#define PAHW_SMS (1<<4) -extern int PicoAHW; // Pico active hw +#define PAHW_MCD (1<<0) +#define PAHW_32X (1<<1) +#define PAHW_SVP (1<<2) +#define PAHW_PICO (1<<3) -#define PQUIRK_FORCE_6BTN (1<<0) -extern int PicoQuirks; +#define PAHW_SMS (1<<4) +#define PAHW_GG (1<<5) +#define PAHW_SG (1<<6) +#define PAHW_SC (1<<7) +#define PAHW_8BIT (PAHW_SMS|PAHW_GG|PAHW_SG|PAHW_SC) + +#define PHWS_AUTO 0 +#define PHWS_GG 1 +#define PHWS_SMS 2 +#define PHWS_SG 3 +#define PHWS_SC 4 + +#define PQUIRK_FORCE_6BTN (1<<0) +#define PQUIRK_BLACKTHORNE_HACK (1<<1) +#define PQUIRK_WWFRAW_HACK (1<<2) +#define PQUIRK_MARSCHECK_HACK (1<<3) +#define PQUIRK_NO_Z80_BUS_LOCK (1<<4) + +// the emulator is configured and some status is reported +// through this global state (not saved in savestates) +typedef struct PicoInterface +{ + unsigned int opt; // POPT_* bitfield + + unsigned short pad[4]; // Joypads, format is MXYZ SACB RLDU + unsigned short padInt[4]; // internal copy + unsigned short AHW; // active addon hardware: PAHW_* bitfield + + unsigned short skipFrame; // skip rendering frame, but still do sound (if enabled) and emulation stuff + unsigned short regionOverride; // override the region detection 0: auto, 1: Japan NTSC, 2: Japan PAL, 4: US, 8: Europe + unsigned short autoRgnOrder; // packed priority list of regions, for example 0x148 means this detection order: EUR, USA, JAP + unsigned int hwSelect; // hardware preselected via option menu + unsigned int mapper; // mapper selection for SMS, 0 = auto + unsigned int tmsPalette; // palette used by SMS in TMS graphic modes + + unsigned short quirks; // game-specific quirks: PQUIRK_* + unsigned short overclockM68k; // overclock the emulated 68k, in % + + unsigned short filter; // softscale filter type + + int sndRate; // rate in Hz + int sndFilterAlpha; // Low pass sound filter alpha (Q16) + short *sndOut; // PCM output buffer + void (*writeSound)(int len); // write .sndOut callback, called once per frame + + void (*osdMessage)(const char *msg); // output OSD message from emu, optional + + void (*mcdTrayOpen)(void); + void (*mcdTrayClose)(void); +} PicoInterface; + +extern PicoInterface PicoIn; -extern int PicoSkipFrame; // skip rendering frame, but still do sound (if enabled) and emulation stuff -extern int PicoRegionOverride; // override the region detection 0: auto, 1: Japan NTSC, 2: Japan PAL, 4: US, 8: Europe -extern int PicoAutoRgnOrder; // packed priority list of regions, for example 0x148 means this detection order: EUR, USA, JAP -extern int PicoSVPCycles; void PicoInit(void); void PicoExit(void); void PicoPower(void); @@ -92,30 +143,21 @@ int PicoReset(void); void PicoLoopPrepare(void); void PicoFrame(void); void PicoFrameDrawOnly(void); -extern int PicoPad[2]; // Joypads, format is MXYZ SACB RLDU -extern void (*PicoWriteSound)(int bytes); // called once per frame at the best time to send sound buffer (PsndOut) to hardware -extern void (*PicoMessage)(const char *msg); // callback to output text message from emu typedef enum { PI_ROM, PI_ISPAL, PI_IS40_CELL, PI_IS240_LINES } pint_t; typedef union { int vint; void *vptr; } pint_ret_t; void PicoGetInternal(pint_t which, pint_ret_t *ret); -// cd/mcd.c -extern void (*PicoMCDopenTray)(void); -extern void (*PicoMCDcloseTray)(void); -extern int PicoCDBuffers; +struct PicoEState; // pico.c -#define XPCM_BUFFER_SIZE (320+160) +#define XPCM_BUFFER_SIZE 64 typedef struct { int pen_pos[2]; int page; - // internal int fifo_bytes; // bytes in FIFO - int fifo_bytes_prev; - int fifo_line_bytes; // float part, << 16 - int line_counter; unsigned short r1, r12; + unsigned int reserved[3]; unsigned char xpcm_buffer[XPCM_BUFFER_SIZE+4]; unsigned char *xpcm_ptr; } picohw_state; @@ -128,21 +170,17 @@ void *PicoTmpStateSave(void); void PicoTmpStateRestore(void *data); extern void (*PicoStateProgressCB)(const char *str); -// cd/buffering.c -void PicoCDBufferInit(void); -void PicoCDBufferFree(void); -void PicoCDBufferFlush(void); - -// cd/cd_sys.c -int Insert_CD(const char *cdimg_name, int type); -int Stop_CD(void); // unloads CD, returns 1 if there was cd loaded +// cd/cdd.c +int cdd_load(const char *filename, int type); +int cdd_unload(void); // Cart.c typedef enum { PMT_UNCOMPRESSED = 0, PMT_ZIP, - PMT_CSO + PMT_CSO, + PMT_CHD } pm_type; typedef struct { @@ -153,10 +191,13 @@ typedef struct char ext[4]; } pm_file; pm_file *pm_open(const char *path); +void pm_sectorsize(int length, pm_file *stream); size_t pm_read(void *ptr, size_t bytes, pm_file *stream); +size_t pm_read_audio(void *ptr, size_t bytes, pm_file *stream); int pm_seek(pm_file *stream, long offset, int whence); int pm_close(pm_file *fp); -int PicoCartLoad(pm_file *f,unsigned char **prom,unsigned int *psize,int is_sms); +int PicoCartLoad(pm_file *f, const unsigned char *rom, unsigned int romsize, + unsigned char **prom, unsigned int *psize, int is_sms); int PicoCartInsert(unsigned char *rom, unsigned int romsize, const char *carthw_cfg); void PicoCartUnload(void); extern void (*PicoCartLoadProgressCB)(int percent); @@ -175,32 +216,31 @@ typedef enum void PicoDrawSetOutFormat(pdso_t which, int use_32x_line_mode); void PicoDrawSetOutBuf(void *dest, int increment); void PicoDrawSetCallbacks(int (*begin)(unsigned int num), int (*end)(unsigned int num)); -extern void *DrawLineDest; -extern unsigned char *HighCol; // utility #ifdef _ASM_DRAW_C void vidConvCpyRGB565(void *to, void *from, int pixels); #endif -void PicoDoHighPal555(int sh); -extern int PicoDrawMask; -#define PDRAW_LAYERB_ON (1<<2) -#define PDRAW_LAYERA_ON (1<<3) -#define PDRAW_SPRITES_LOW_ON (1<<4) -#define PDRAW_SPRITES_HI_ON (1<<7) -#define PDRAW_32X_ON (1<<8) -// internals -#define PDRAW_SPRITES_MOVED (1<<0) // (asm) +void PicoDoHighPal555(int sh, int line, struct PicoEState *est); +// internals, NB must keep in sync with ASM draw functions #define PDRAW_WND_DIFF_PRIO (1<<1) // not all window tiles use same priority -#define PDRAW_SPR_LO_ON_HI (1<<2) // seen sprites without layer pri bit ontop spr. with that bit +#define PDRAW_PARSE_SPRITES (1<<2) // SAT needs parsing #define PDRAW_INTERLACE (1<<3) -#define PDRAW_DIRTY_SPRITES (1<<4) // (asm) +#define PDRAW_DIRTY_SPRITES (1<<4) // SAT modified #define PDRAW_SONIC_MODE (1<<5) // mid-frame palette changes for 8bit renderer #define PDRAW_PLANE_HI_PRIO (1<<6) // have layer with all hi prio tiles (mk3) #define PDRAW_SHHI_DONE (1<<7) // layer sh/hi already processed -#define PDRAW_32_COLS (1<<8) // 32 column mode -extern int rendstatus, rendstatus_old; +#define PDRAW_32_COLS (1<<8) // 32 columns mode +#define PDRAW_BORDER_32 (1<<9) // center H32 in buffer (32 px border) +#define PDRAW_SKIP_FRAME (1<<10) // frame is skipped +#define PDRAW_30_ROWS (1<<11) // 30 rows mode (240 lines) +#define PDRAW_32X_SCALE (1<<12) // scale CLUT layer for 32X +#define PDRAW_SMS_BLANK_1 (1<<13) // 1st column blanked +#define PDRAW_BGC_DMA (1<<14) // in background color DMA +#define PDRAW_SOFTSCALE (1<<15) // H32 upscaling +#define PDRAW_SYNC_NEEDED (1<<16) // redraw needed +#define PDRAW_SYNC_NEXT (1<<17) // redraw next frame +extern int rendstatus_old; extern int rendlines; -extern unsigned short HighPal[0x100]; // draw.c void PicoDrawUpdateHighPal(void); @@ -208,9 +248,8 @@ void PicoDrawSetInternalBuf(void *dest, int line_increment); // draw2.c // stuff below is optional -extern unsigned char *PicoDraw2FB; // buffer for fast renderer in format (8+320)x(8+224+8) (eights for borders) extern unsigned short *PicoCramHigh; // pointer to CRAM buff (0x40 shorts), converted to native device color (works only with 16bit for now) -extern void (*PicoPrepareCram)(); // prepares PicoCramHigh for renderer to use +extern void (*PicoPrepareCram)(void);// prepares PicoCramHigh for renderer to use // pico.c (32x) #ifndef NO_32X @@ -229,9 +268,7 @@ void Pico32xSetClocks(int msh2_hz, int ssh2_hz); #define PICO_SSH2_HZ ((int)(7670442.0 * 2.4)) // sound.c -extern int PsndRate,PsndLen; -extern short *PsndOut; -extern void (*PsndMix_32_to_16l)(short *dest, int *src, int count); +extern void (*PsndMix_32_to_16)(s16 *dest, s32 *src, int count); void PsndRerate(int preserve_state); // media.c @@ -242,9 +279,42 @@ enum media_type_e { PM_BAD_CD_NO_BIOS = -4, PM_MD_CART = 1, /* also 32x */ PM_MARK3, + PM_PICO, PM_CD, }; + +enum cd_track_type +{ + CT_UNKNOWN = 0, + // data tracks + CT_ISO = 1, /* 2048 B/sector */ + CT_BIN = 2, /* 2352 B/sector */ + // audio tracks + CT_AUDIO = 8, + CT_RAW = CT_AUDIO | 1, + CT_CHD = CT_AUDIO | 2, + CT_MP3 = CT_AUDIO | 3, + CT_WAV = CT_AUDIO | 4, +}; + +typedef struct +{ + char *fname; + int pregap; /* pregap for current track */ + int sector_offset; /* in current file */ + int sector_xlength; + enum cd_track_type type; +} cd_track_t; + +typedef struct +{ + int track_count; + cd_track_t tracks[0]; +} cd_data_t; + + enum media_type_e PicoLoadMedia(const char *filename, + const unsigned char *rom, unsigned int romsize, const char *carthw_cfg_fname, const char *(*get_bios_filename)(int *region, const char *cd_fname), void (*do_region_override)(const char *media_filename)); @@ -257,6 +327,8 @@ enum input_device { PICO_INPUT_NOTHING, PICO_INPUT_PAD_3BTN, PICO_INPUT_PAD_6BTN, + PICO_INPUT_PAD_TEAM, + PICO_INPUT_PAD_4WAY, }; void PicoSetInputDevice(int port, enum input_device device); diff --git a/pico/pico/memory.c b/pico/pico/memory.c index 7c3a6463..f294b037 100644 --- a/pico/pico/memory.c +++ b/pico/pico/memory.c @@ -1,13 +1,13 @@ /* * PicoDrive * (C) notaz, 2008 + * (C) irixxxx, 2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ #include "../pico_int.h" #include "../memory.h" -#include "../sound/sn76496.h" /* void dump(u16 w) @@ -25,52 +25,46 @@ void dump(u16 w) } */ +static u32 PicoRead16_pico(u32 a) +{ + u32 d = 0; + + switch (a & 0x1e) + { + case 0x00: d = PicoPicohw.r1; break; + case 0x02: d = PicoIn.pad[0]&0x1f; // d-pad + d |= (PicoIn.pad[0]&0x20) << 2; // pen push -> C + d = ~d; + break; + case 0x04: d = (PicoPicohw.pen_pos[0] >> 8); break; // what is MS bit for? Games read it.. + case 0x06: d = PicoPicohw.pen_pos[0] & 0xff; break; + case 0x08: d = (PicoPicohw.pen_pos[1] >> 8); break; + case 0x0a: d = PicoPicohw.pen_pos[1] & 0xff; break; + case 0x0c: d = (1 << (PicoPicohw.page & 7)) - 1; break; + case 0x10: d = (PicoPicohw.fifo_bytes > 0x3f) ? 0 : (0x3f - PicoPicohw.fifo_bytes); break; + case 0x12: d = (PicoPicohw.fifo_bytes | !PicoPicoPCMBusyN()) ? 0 : 0x8000; + d |= PicoPicohw.r12 & 0x7fff; + break; + default: elprintf(EL_UIO, "m68k unmapped r16 [%06x] @%06x", a, SekPc); break; + } + return d; +} + static u32 PicoRead8_pico(u32 a) { u32 d = 0; if ((a & 0xffffe0) == 0x800000) // Pico I/O { - switch (a & 0x1f) - { - case 0x01: d = PicoPicohw.r1; break; - case 0x03: - d = PicoPad[0]&0x1f; // d-pad - d |= (PicoPad[0]&0x20) << 2; // pen push -> C - d = ~d; - break; - - case 0x05: d = (PicoPicohw.pen_pos[0] >> 8); break; // what is MS bit for? Games read it.. - case 0x07: d = PicoPicohw.pen_pos[0] & 0xff; break; - case 0x09: d = (PicoPicohw.pen_pos[1] >> 8); break; - case 0x0b: d = PicoPicohw.pen_pos[1] & 0xff; break; - case 0x0d: d = (1 << (PicoPicohw.page & 7)) - 1; break; - case 0x12: d = PicoPicohw.fifo_bytes == 0 ? 0x80 : 0; break; // guess - default: - goto unhandled; - } - return d; + d = PicoRead16_pico(a); + if (!(a & 1)) d >>= 8; + return d & 0xff; } -unhandled: elprintf(EL_UIO, "m68k unmapped r8 [%06x] @%06x", a, SekPc); return d; } -static u32 PicoRead16_pico(u32 a) -{ - u32 d = 0; - - if (a == 0x800010) - d = (PicoPicohw.fifo_bytes > 0x3f) ? 0 : (0x3f - PicoPicohw.fifo_bytes); - else if (a == 0x800012) - d = PicoPicohw.fifo_bytes == 0 ? 0x8000 : 0; // guess - else - elprintf(EL_UIO, "m68k unmapped r16 [%06x] @%06x", a, SekPc); - - return d; -} - static void PicoWrite8_pico(u32 a, u32 d) { switch (a & ~0x800000) { @@ -98,10 +92,20 @@ static void PicoWrite16_pico(u32 a, u32 d) } } else if (a == 0x800012) { - int r12_old = PicoPicohw.r12; PicoPicohw.r12 = d; - if (r12_old != d) - PicoReratePico(); + + PicoPicoPCMGain(8 - (d & 0x0007)); // volume + PicoPicoPCMFilter((d & 0x00c0) >> 6); // low pass filter + PicoPicoPCMIrqEn(d & 0x4000); // PCM IRQ enable + + if (d & 0x8000) { // PCM reset if 1 is written (dalmatians)? + PsndDoPCM(cycles_68k_to_z80(SekCyclesDone() - Pico.t.m68c_frame_start)); + PicoPicoPCMResetN(0); + PicoPicohw.xpcm_ptr = PicoPicohw.xpcm_buffer; + PicoPicohw.fifo_bytes = 0; + PicoPicoPCMResetN(1); + } + // other bits used in software: 0x3f00. } else elprintf(EL_UIO, "m68k unmapped w16 [%06x] %04x @%06x", a, d & 0xffff, SekPc); diff --git a/pico/pico/pico.c b/pico/pico/pico.c index bff5a2a4..94c80f20 100644 --- a/pico/pico/pico.c +++ b/pico/pico/pico.c @@ -1,6 +1,7 @@ /* * PicoDrive * (C) notaz, 2008 + * (C) irixxxx, 2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -12,72 +13,43 @@ // 0x2f8 - 0x3f3 picohw_state PicoPicohw; -static int prev_line_cnt_irq3 = 0, prev_line_cnt_irq5 = 0; -static int fifo_bytes_line = (16000<<16)/60/262/2; -static const int guessed_rates[] = { 8000, 14000, 12000, 14000, 16000, 18000, 16000, 16000 }; // ? - -#define PICOHW_FIFO_IRQ_THRESHOLD 12 PICO_INTERNAL void PicoReratePico(void) { - int rate = guessed_rates[PicoPicohw.r12 & 7]; - if (Pico.m.pal) - fifo_bytes_line = (rate<<16)/50/312/2; - else fifo_bytes_line = (rate<<16)/60/262/2; - PicoPicoPCMRerate(rate); + PicoPicoPCMRerate(); + PicoPicohw.xpcm_ptr = PicoPicohw.xpcm_buffer + PicoPicohw.fifo_bytes; } static void PicoLinePico(void) { - PicoPicohw.line_counter++; - -#if 1 - if ((PicoPicohw.r12 & 0x4003) && PicoPicohw.line_counter - prev_line_cnt_irq3 > 200) { - prev_line_cnt_irq3 = PicoPicohw.line_counter; - // just a guess/hack, allows 101 Dalmantians to boot - elprintf(EL_PICOHW, "irq3"); - SekInterrupt(3); - return; - } -#endif - - if (PicoPicohw.fifo_bytes > 0) - { - PicoPicohw.fifo_line_bytes += fifo_bytes_line; - if (PicoPicohw.fifo_line_bytes >= (1<<16)) { - PicoPicohw.fifo_bytes -= PicoPicohw.fifo_line_bytes >> 16; - PicoPicohw.fifo_line_bytes &= 0xffff; - if (PicoPicohw.fifo_bytes < 0) - PicoPicohw.fifo_bytes = 0; - } - } - else - PicoPicohw.fifo_line_bytes = 0; - -#if 1 - if (PicoPicohw.fifo_bytes_prev >= PICOHW_FIFO_IRQ_THRESHOLD && - PicoPicohw.fifo_bytes < PICOHW_FIFO_IRQ_THRESHOLD) { - prev_line_cnt_irq3 = PicoPicohw.line_counter; // ? - elprintf(EL_PICOHW, "irq3, fb=%i", PicoPicohw.fifo_bytes); - SekInterrupt(3); - } - PicoPicohw.fifo_bytes_prev = PicoPicohw.fifo_bytes; -#endif - -#if 0 - if (PicoPicohw.line_counter - prev_line_cnt_irq5 > 512) { - prev_line_cnt_irq5 = PicoPicohw.line_counter; - elprintf(EL_PICOHW, "irq5"); - SekInterrupt(5); - } -#endif + // update sound so that irq for FIFO refill is generated + if ((PicoPicohw.fifo_bytes | !PicoPicoPCMBusyN()) && (Pico.m.scanline & 7) == 7) + PsndDoPCM(cycles_68k_to_z80(SekCyclesDone() - Pico.t.m68c_frame_start)); } static void PicoResetPico(void) { - PicoPicoPCMReset(); + PicoPicoPCMResetN(1); + PicoPicoPCMStartN(1); PicoPicohw.xpcm_ptr = PicoPicohw.xpcm_buffer; + PicoPicohw.fifo_bytes = 0; + PicoPicohw.r12 = 0; + + PicoPicohw.pen_pos[0] = PicoPicohw.pen_pos[1] = 0x8000; + + PicoPicoPCMIrqEn(0); + PicoPicoPCMFilter(0); + PicoPicoPCMGain(8); + + // map version register + PicoDetectRegion(); + switch (Pico.m.hardware >> 6) { + case 0: PicoPicohw.r1 = 0x40; break; // JP NTSC + case 1: PicoPicohw.r1 = 0x00; break; // JP PAL + case 2: PicoPicohw.r1 = 0x60; break; // US + case 3: PicoPicohw.r1 = 0x20; break; // EU + } } PICO_INTERNAL void PicoInitPico(void) @@ -86,19 +58,7 @@ PICO_INTERNAL void PicoInitPico(void) PicoLineHook = PicoLinePico; PicoResetHook = PicoResetPico; - PicoAHW = PAHW_PICO; + PicoIn.AHW = PAHW_PICO; memset(&PicoPicohw, 0, sizeof(PicoPicohw)); - PicoPicohw.pen_pos[0] = 0x03c + 320/2; - PicoPicohw.pen_pos[1] = 0x200 + 240/2; - prev_line_cnt_irq3 = prev_line_cnt_irq5 = 0; - - // map version register - PicoDetectRegion(); - switch (Pico.m.hardware >> 6) { - case 0: PicoPicohw.r1 = 0x00; break; - case 1: PicoPicohw.r1 = 0x00; break; - case 2: PicoPicohw.r1 = 0x40; break; - case 3: PicoPicohw.r1 = 0x20; break; - } + PicoPicohw.pen_pos[0] = PicoPicohw.pen_pos[1] = 0x8000; } - diff --git a/pico/pico/xpcm.c b/pico/pico/xpcm.c index 0109dcf9..7d69b43e 100644 --- a/pico/pico/xpcm.c +++ b/pico/pico/xpcm.c @@ -1,120 +1,353 @@ /* * PicoDrive * (C) notaz, 2008 + * (C) irixxxx, 2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. * - * The following ADPCM algorithm was stolen from MAME aica driver. - * I'm quite sure it's not the right one, but it's the - * best sounding of the ones that I tried. + * The following ADPCM algorithm was derived from MAME upd7759 driver. + * + * The Pico is using this chip in slave mode. In this mode there are no ROM + * headers, but the first byte sent to the chip is used to start the ADPCM + * engine. This byte is discarded, i.e. not processed by the engine. + * + * Data is fed into the chip through a FIFO. An Interrupt is created if the + * FIFO has been drained below the low water mark. + * + * The Pico has 2 extensions to the standard upd7759 chip: + * - gain control, used to control the volume of the ADPCM output + * - filtering, used to remove (some of) the ADPCM compression artifacts */ +#include #include "../pico_int.h" -#define ADPCMSHIFT 8 -#define ADFIX(f) (int) ((double)f * (double)(1< max ) val = max; \ - else if ( val < min ) val = min; \ -} +#define Limit(val, max, min) \ + (val > max ? max : val < min ? min : val) -static const int TableQuant[8] = +#define ADPCM_CLOCK (1280000/4) + +#define FIFO_IRQ_THRESHOLD 16 + +static const int step_deltas[16][16] = { - ADFIX(0.8984375), - ADFIX(0.8984375), - ADFIX(0.8984375), - ADFIX(0.8984375), - ADFIX(1.19921875), - ADFIX(1.59765625), - ADFIX(2.0), - ADFIX(2.3984375) + { 0, 0, 1, 2, 3, 5, 7, 10, 0, 0, -1, -2, -3, -5, -7, -10 }, + { 0, 1, 2, 3, 4, 6, 8, 13, 0, -1, -2, -3, -4, -6, -8, -13 }, + { 0, 1, 2, 4, 5, 7, 10, 15, 0, -1, -2, -4, -5, -7, -10, -15 }, + { 0, 1, 3, 4, 6, 9, 13, 19, 0, -1, -3, -4, -6, -9, -13, -19 }, + { 0, 2, 3, 5, 8, 11, 15, 23, 0, -2, -3, -5, -8, -11, -15, -23 }, + { 0, 2, 4, 7, 10, 14, 19, 29, 0, -2, -4, -7, -10, -14, -19, -29 }, + { 0, 3, 5, 8, 12, 16, 22, 33, 0, -3, -5, -8, -12, -16, -22, -33 }, + { 1, 4, 7, 10, 15, 20, 29, 43, -1, -4, -7, -10, -15, -20, -29, -43 }, + { 1, 4, 8, 13, 18, 25, 35, 53, -1, -4, -8, -13, -18, -25, -35, -53 }, + { 1, 6, 10, 16, 22, 31, 43, 64, -1, -6, -10, -16, -22, -31, -43, -64 }, + { 2, 7, 12, 19, 27, 37, 51, 76, -2, -7, -12, -19, -27, -37, -51, -76 }, + { 2, 9, 16, 24, 34, 46, 64, 96, -2, -9, -16, -24, -34, -46, -64, -96 }, + { 3, 11, 19, 29, 41, 57, 79, 117, -3, -11, -19, -29, -41, -57, -79, -117 }, + { 4, 13, 24, 36, 50, 69, 96, 143, -4, -13, -24, -36, -50, -69, -96, -143 }, + { 4, 16, 29, 44, 62, 85, 118, 175, -4, -16, -29, -44, -62, -85, -118, -175 }, + { 6, 20, 36, 54, 76, 104, 144, 214, -6, -20, -36, -54, -76, -104, -144, -214 }, }; -// changed using trial and error.. -//static const int quant_mul[16] = { 1, 3, 5, 7, 9, 11, 13, 15, -1, -3, -5, -7, -9, -11, -13, -15 }; -static const int quant_mul[16] = { 1, 3, 5, 7, 9, 11, 13, -1, -1, -3, -5, -7, -9, -11, -13, -15 }; +static const int state_deltas[16] = { -1, -1, 0, 0, 1, 2, 2, 3, -1, -1, 0, 0, 1, 2, 2, 3 }; -static int sample = 0, quant = 0, sgn = 0; -static int stepsamples = (44100<<10)/16000; +static s32 stepsamples; // ratio as Q16, host sound rate / chip sample rate + +static struct xpcm_state { + s32 samplepos; // leftover duration for current sample wrt sndrate, Q16 + int sample; // current sample + short state; // ADPCM decoder state + short samplegain; // programmable gain + + char startpin; // value on the !START pin + char irqenable; // IRQ enabled? + + char portstate; // ADPCM stream state + short silence; // silence blocks still to be played + short rate, nibbles; // ADPCM nibbles still to be played + unsigned char highlow, cache; // nibble selector and cache + + char filter; // filter selector + s32 x[3], y[3]; // filter history +} xpcm; +enum { RESET, START, HDR, COUNT }; // portstate -PICO_INTERNAL void PicoPicoPCMReset(void) +// SEGA Pico specific filtering + +#define QB 16 // mantissa bits +#define FP(f) (int)((f)*(1<= rate/2) { + memset(iir, 0, sizeof(*iir)); + return; + } + + // compute 2nd order butterworth filter coefficients + double a = 1 / tan(M_PI * cutoff / rate); + double axa = a*a; + double gain = 1/(1 + M_SQRT2*a + axa); + iir->gain = FP(gain); + iir->a[0] = FP(2 * (axa-1) * gain); + iir->a[1] = FP(-(1 - M_SQRT2*a + axa) * gain); } -PICO_INTERNAL void PicoPicoPCMRerate(int xpcm_rate) +static int PicoPicoFilterApply(struct iir2 *iir, int sample) { - stepsamples = (PsndRate<<10)/xpcm_rate; + if (!iir) + return sample; + + // NB Butterworth specific! + xpcm.x[0] = xpcm.x[1]; xpcm.x[1] = xpcm.x[2]; + xpcm.x[2] = sample * iir->gain; // Qb + xpcm.y[0] = xpcm.y[1]; xpcm.y[1] = xpcm.y[2]; + xpcm.y[2] = (xpcm.x[0] + 2*xpcm.x[1] + xpcm.x[2] + + xpcm.y[0]*iir->a[1] + xpcm.y[1]*iir->a[0]) >> QB; + return xpcm.y[2]; } -#define XSHIFT 6 -#define do_sample() \ +// pin functions, N designating a negated pin + +PICO_INTERNAL void PicoPicoPCMResetN(int pin) +{ + if (!pin) { + xpcm.portstate = RESET; + xpcm.sample = xpcm.samplepos = xpcm.state = 0; + xpcm.nibbles = xpcm.silence = 0; + } else if (xpcm.portstate == RESET) + xpcm.portstate = START; +} + +PICO_INTERNAL void PicoPicoPCMStartN(int pin) +{ + xpcm.startpin = pin; +} + +PICO_INTERNAL int PicoPicoPCMBusyN(void) +{ + return (xpcm.portstate <= START); +} + + +// configuration functions + +PICO_INTERNAL void PicoPicoPCMRerate(void) +{ + s32 nextstep = ((u64)PicoIn.sndRate<<16)/ADPCM_CLOCK; + + // if the sound rate changes, erase filter history to avoid freak behaviour + if (stepsamples != nextstep) { + memset(xpcm.x, 0, sizeof(xpcm.x)); + memset(xpcm.y, 0, sizeof(xpcm.y)); + } + + // output samples per chip clock + stepsamples = nextstep; + + // compute filter coefficients, cutoff at half the ADPCM sample rate + PicoPicoFilterCoeff(&filters[1], 6000/2, PicoIn.sndRate); // 5-6 KHz + PicoPicoFilterCoeff(&filters[2], 9000/2, PicoIn.sndRate); // 8-12 KHz + PicoPicoFilterCoeff(&filters[3], 15000/2, PicoIn.sndRate); // 14-16 KHz + + PicoPicoPCMFilter(xpcm.filter); +} + +PICO_INTERNAL void PicoPicoPCMGain(int gain) +{ + xpcm.samplegain = gain*4; +} + +PICO_INTERNAL void PicoPicoPCMFilter(int index) +{ + // if the filter changes, erase the history to avoid freak behaviour + if (index != xpcm.filter) { + memset(xpcm.x, 0, sizeof(xpcm.x)); + memset(xpcm.y, 0, sizeof(xpcm.y)); + } + + xpcm.filter = index; + filter = filters+index; + if (filter->a[0] == 0) + filter = NULL; +} + +PICO_INTERNAL void PicoPicoPCMIrqEn(int enable) +{ + xpcm.irqenable = (enable ? 3 : 0); +} + +// TODO need an interupt pending mask? +PICO_INTERNAL int PicoPicoIrqAck(int level) +{ + return (PicoPicohw.fifo_bytes < FIFO_IRQ_THRESHOLD && level != xpcm.irqenable + ? xpcm.irqenable : 0); +} + + +// adpcm operation + +#define apply_filter(v) PicoPicoFilterApply(filter, v) + +// compute next ADPCM sample +#define do_sample(nibble) \ { \ - int delta = quant * quant_mul[srcval] >> XSHIFT; \ - sample += delta - (delta >> 2); /* 3/4 */ \ - quant = (quant * TableQuant[srcval&7]) >> ADPCMSHIFT; \ - Limit(quant, 0x6000, 0x7f); \ - Limit(sample, 32767*3/4, -32768*3/4); \ + xpcm.sample += step_deltas[xpcm.state][nibble]; \ + xpcm.state += state_deltas[nibble]; \ + xpcm.state = (xpcm.state < 0 ? 0 : xpcm.state > 15 ? 15 : xpcm.state); \ +} + +// writes samples with sndRate, nearest neighbour resampling, filtering +#define write_sample(buffer, length, stereo) \ +{ \ + while (xpcm.samplepos > 0 && length > 0) { \ + int val = Limit(xpcm.samplegain*xpcm.sample, 16383, -16384); \ + xpcm.samplepos -= 1<<16; \ + length --; \ + if (buffer) { \ + int out = apply_filter(val); \ + *buffer++ += out; \ + if (stereo) *buffer++ += out; \ + } \ + } \ } PICO_INTERNAL void PicoPicoPCMUpdate(short *buffer, int length, int stereo) { unsigned char *src = PicoPicohw.xpcm_buffer; unsigned char *lim = PicoPicohw.xpcm_ptr; - int srcval, needsamples = 0; + int srcval, irq = 0; - if (src == lim) goto end; + // leftover partial sample from last run + write_sample(buffer, length, stereo); - for (; length > 0 && src < lim; src++) + // loop over FIFO data, generating ADPCM samples + while (length > 0 && src < lim) { - srcval = *src >> 4; - do_sample(); + // ADPCM state engine + if (xpcm.silence > 0) { // generate silence + xpcm.silence --; + xpcm.sample = 0; + xpcm.samplepos += stepsamples*256; - for (needsamples += stepsamples; needsamples > (1<<10) && length > 0; needsamples -= (1<<10), length--) { - *buffer++ += sample; - if (stereo) { buffer[0] = buffer[-1]; buffer++; } - } + } else if (xpcm.nibbles > 0) { // produce samples + xpcm.nibbles --; - srcval = *src & 0xf; - do_sample(); + if (xpcm.highlow) + xpcm.cache = *src++; + else + xpcm.cache <<= 4; + xpcm.highlow = !xpcm.highlow; - for (needsamples += stepsamples; needsamples > (1<<10) && length > 0; needsamples -= (1<<10), length--) { - *buffer++ += sample; - if (stereo) { buffer[0] = buffer[-1]; buffer++; } - } + do_sample((xpcm.cache & 0xf0) >> 4); + xpcm.samplepos += stepsamples*xpcm.rate; - // lame normalization stuff, needed due to wrong adpcm algo - sgn += (sample < 0) ? -1 : 1; - if (sgn < -16 || sgn > 16) sample -= sample >> 5; + } else switch (xpcm.portstate) { // handle stream headers + case RESET: + xpcm.sample = 0; + xpcm.samplepos += length<<16; + break; + case START: + if (xpcm.startpin) { + if (*src) + xpcm.portstate ++; + else // kill 0x00 bytes at stream start + src ++; + } else { + xpcm.sample = 0; + xpcm.samplepos += length<<16; + } + break; + case HDR: + srcval = *src++; + xpcm.nibbles = xpcm.silence = xpcm.rate = 0; + xpcm.highlow = 1; + if (srcval == 0) { // terminator + // HACK, kill leftover odd byte to avoid restart (Minna de Odorou) + if (lim-src == 1) src++; + xpcm.portstate = START; + } else switch (srcval >> 6) { + case 0: xpcm.silence = (srcval & 0x3f) + 1; break; + case 1: xpcm.rate = (srcval & 0x3f) + 1; xpcm.nibbles = 256; break; + case 2: xpcm.rate = (srcval & 0x3f) + 1; xpcm.portstate = COUNT; break; + case 3: break; + } + break; + case COUNT: + xpcm.nibbles = *src++ + 1; xpcm.portstate = HDR; + break; + } + + write_sample(buffer, length, stereo); } - if (src < lim) { + // buffer cleanup, generate irq if lowwater reached + if (src < lim && src != PicoPicohw.xpcm_buffer) { int di = lim - src; memmove(PicoPicohw.xpcm_buffer, src, di); PicoPicohw.xpcm_ptr = PicoPicohw.xpcm_buffer + di; elprintf(EL_PICOHW, "xpcm update: over %i", di); - // adjust fifo + + if (!irq && di < FIFO_IRQ_THRESHOLD) + irq = xpcm.irqenable; PicoPicohw.fifo_bytes = di; - return; + } else if (src == lim && src != PicoPicohw.xpcm_buffer) { + PicoPicohw.xpcm_ptr = PicoPicohw.xpcm_buffer; + elprintf(EL_PICOHW, "xpcm update: under %i", length); + + if (!irq) + irq = xpcm.irqenable; + PicoPicohw.fifo_bytes = 0; } - elprintf(EL_PICOHW, "xpcm update: under %i", length); - PicoPicohw.xpcm_ptr = PicoPicohw.xpcm_buffer; + // TODO need an IRQ mask somewhere to avoid loosing one in cases of HINT/VINT + if (irq && SekIrqLevel != irq) { + elprintf(EL_PICOHW, "irq%d", irq); + if (SekIrqLevel < irq) + SekInterrupt(irq); + } -end: - if (stereo) - // still must expand SN76496 to stereo - for (; length > 0; buffer+=2, length--) - buffer[1] = buffer[0]; - - sample = sgn = 0; - quant = 0x7f; + if (buffer && length) { + // for underflow, use last sample to avoid clicks + int val = Limit(xpcm.samplegain*xpcm.sample, 16383, -16384); + while (length--) { + int out = apply_filter(val); + *buffer++ += out; + if (stereo) *buffer++ += out; + } + } } +PICO_INTERNAL int PicoPicoPCMSave(void *buffer, int length) +{ + u8 *bp = buffer; + + if (length < sizeof(xpcm)) { + elprintf(EL_ANOMALY, "save buffer too small?"); + return 0; + } + + memcpy(bp, &xpcm, sizeof(xpcm)); + bp += sizeof(xpcm); + return (bp - (u8*)buffer); +} + +PICO_INTERNAL void PicoPicoPCMLoad(void *buffer, int length) +{ + u8 *bp = buffer; + + if (length >= sizeof(xpcm)) + memcpy(&xpcm, bp, sizeof(xpcm)); + bp += sizeof(xpcm); +} diff --git a/pico/pico_cmn.c b/pico/pico_cmn.c index 341255a8..e9b79952 100644 --- a/pico/pico_cmn.c +++ b/pico/pico_cmn.c @@ -1,14 +1,14 @@ /* * common code for base/cd/32x * (C) notaz, 2007-2009,2013 + * (C) irixxxx, 2020-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ #define CYCLES_M68K_LINE 488 // suitable for both PAL/NTSC -#define CYCLES_M68K_VINT_LAG 68 -#define CYCLES_M68K_ASD 148 +#define CYCLES_M68K_VINT_LAG 112 // pad delay (for 6 button pads) #define PAD_DELAY() { \ @@ -22,264 +22,327 @@ SekRunM68k(m68k_cycles) #endif -// sync m68k to SekCycleAim -static void SekSyncM68k(void) +// sync m68k to Pico.t.m68c_aim +static void SekExecM68k(int cyc_do) +{ + Pico.t.m68c_cnt += cyc_do; + +#if defined(EMU_C68K) + PicoCpuCM68k.cycles = cyc_do; + CycloneRun(&PicoCpuCM68k); + Pico.t.m68c_cnt -= PicoCpuCM68k.cycles; +#elif defined(EMU_M68K) + Pico.t.m68c_cnt += m68k_execute(cyc_do) - cyc_do; +#elif defined(EMU_F68K) + Pico.t.m68c_cnt += fm68k_emulate(&PicoCpuFM68k, cyc_do, 0) - cyc_do; +#endif + SekCyclesLeft = 0; +} + +static int SekSyncM68k(int once) { int cyc_do; + pprof_start(m68k); pevt_log_m68k_o(EVT_RUN_START); - while ((cyc_do = SekCycleAim - SekCycleCnt) > 0) { - SekCycleCnt += cyc_do; - -#if defined(EMU_C68K) - PicoCpuCM68k.cycles = cyc_do; - CycloneRun(&PicoCpuCM68k); - SekCycleCnt -= PicoCpuCM68k.cycles; -#elif defined(EMU_M68K) - SekCycleCnt += m68k_execute(cyc_do) - cyc_do; -#elif defined(EMU_F68K) - SekCycleCnt += fm68k_emulate(cyc_do, 0) - cyc_do; -#endif + while ((cyc_do = Pico.t.m68c_aim - Pico.t.m68c_cnt) > 0) { + // the Z80 CPU is stealing some bus cycles from the 68K main CPU when + // accessing the main bus. Account for these by shortening the time + // the 68K CPU runs. + int z80_buscyc = Pico.t.z80_buscycles >> (~Pico.m.scanline & 1); + if (z80_buscyc <= cyc_do) + SekExecM68k(cyc_do - z80_buscyc); + else + z80_buscyc = cyc_do; + Pico.t.m68c_cnt += z80_buscyc; + Pico.t.z80_buscycles -= z80_buscyc; + if (once) break; } - SekCyclesLeft = 0; - SekTrace(0); pevt_log_m68k_o(EVT_RUN_END); pprof_end(m68k); + + return Pico.t.m68c_aim > Pico.t.m68c_cnt; } -static inline void SekRunM68k(int cyc) +static __inline void SekAimM68k(int cyc, int mult) { - SekCycleAim += cyc; - SekSyncM68k(); + // refresh slowdown, for cart: 2 cycles every 128 - make this 1 every 64, + // for RAM: seems to be 0-3 every 128. Carts usually run from the cart + // area, but MCD games only use RAM, hence a different multiplier is needed. + // NB must be quite accurate, so handle fractions as well (c/f OutRunners) + int delay = (Pico.t.refresh_delay += cyc*mult) >> 14; + Pico.t.m68c_cnt += delay; + Pico.t.refresh_delay -= delay << 14; + Pico.t.m68c_aim += cyc; +} + +static __inline void SekRunM68k(int cyc) +{ + // TODO 0x100 would be 2 cycles/128, moreover far too sensitive + SekAimM68k(cyc, 0x108); // OutRunners, testpico, VDPFIFOTesting + SekSyncM68k(0); +} + +static void SyncCPUs(unsigned int cycles) +{ + // sync cpus + if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) + PicoSyncZ80(cycles); + +#ifdef PICO_CD + if (PicoIn.AHW & PAHW_MCD) + pcd_sync_s68k(cycles, 0); +#endif +#ifdef PICO_32X + p32x_sync_sh2s(cycles); +#endif +} + +static void do_hint(struct PicoVideo *pv) +{ + pv->pending_ints |= 0x10; + if (pv->reg[0] & 0x10) { + elprintf(EL_INTS, "hint: @ %06x [%u]", SekPc, SekCyclesDone()); + if (SekIrqLevel < pv->hint_irq) + SekInterrupt(pv->hint_irq); + } +} + +static void do_timing_hacks_end(struct PicoVideo *pv) +{ + PicoVideoFIFOSync(CYCLES_M68K_LINE); + + // need rather tight Z80 sync for emulation of main bus cycle stealing + if (Pico.m.scanline&1) + if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) + PicoSyncZ80(Pico.t.m68c_aim); +} + +static void do_timing_hacks_start(struct PicoVideo *pv) +{ + int cycles = PicoVideoFIFOHint(); + + SekCyclesBurn(cycles); // prolong cpu HOLD if necessary + // XXX how to handle Z80 bus cycle stealing during DMA correctly? + if ((Pico.t.z80_buscycles -= cycles) < 0) + Pico.t.z80_buscycles = 0; + Pico.t.m68c_aim += Pico.m.scanline&1; // add 1 every 2 lines for 488.5 cycles } static int PicoFrameHints(void) { - struct PicoVideo *pv=&Pico.video; - int lines, y, lines_vis = 224, line_sample, skip, vcnt_wrap; - unsigned int cycles; + struct PicoVideo *pv = &Pico.video; + int lines, y, lines_vis, skip; int hint; // Hint counter pevt_log_m68k_o(EVT_FRAME_START); - pv->v_counter = Pico.m.scanline = 0; - if ((PicoOpt&POPT_ALT_RENDERER) && !PicoSkipFrame && (pv->reg[1]&0x40)) { // fast rend., display enabled - // draw a frame just after vblank in alternative render mode - // yes, this will cause 1 frame lag, but this is inaccurate mode anyway. - PicoFrameFull(); -#ifdef DRAW_FINISH_FUNC - DRAW_FINISH_FUNC(); -#endif - skip = 1; - } - else skip=PicoSkipFrame; + skip = PicoIn.skipFrame; - if (Pico.m.pal) { - line_sample = 68; - if (pv->reg[1]&8) lines_vis = 240; - } else { - line_sample = 93; - } + Pico.t.m68c_frame_start = Pico.t.m68c_aim; + PsndStartFrame(); - z80_resetCycles(); - PsndDacLine = 0; - emustatus &= ~1; + hint = pv->hint_cnt; - pv->status&=~0x88; // clear V-Int, come out of vblank + // === active display === + pv->status |= PVS_ACTIVE; - hint=pv->reg[10]; // Load H-Int counter - //dprintf("-hint: %i", hint); - - // This is to make active scan longer (needed for Double Dragon 2, mainly) - CPUS_RUN(CYCLES_M68K_ASD); - - for (y = 0; y < lines_vis; y++) + for (y = 0; y < 240; y++) { - pv->v_counter = Pico.m.scanline = y; - if ((pv->reg[12]&6) == 6) { // interlace mode 2 - pv->v_counter <<= 1; - pv->v_counter |= pv->v_counter >> 8; - pv->v_counter &= 0xff; - } + if (y == 224 && !(pv->reg[1] & 8)) + break; - // VDP FIFO - pv->lwrite_cnt -= 12; - if (pv->lwrite_cnt <= 0) { - pv->lwrite_cnt=0; - Pico.video.status|=0x200; - } + Pico.m.scanline = y; + pv->v_counter = PicoVideoGetV(y, 0); PAD_DELAY(); // H-Interrupts: - if (--hint < 0) // y <= lines_vis: Comix Zone, Golden Axe + if (--hint < 0) { - hint=pv->reg[10]; // Reload H-Int counter - pv->pending_ints|=0x10; - if (pv->reg[0]&0x10) { - elprintf(EL_INTS, "hint: @ %06x [%i]", SekPc, SekCyclesDone()); - SekInterrupt(4); - } + hint = pv->reg[10]; // Reload H-Int counter + do_hint(pv); } // decide if we draw this line - if (!skip && (PicoOpt & POPT_ALT_RENDERER)) + if (unlikely(PicoIn.opt & POPT_ALT_RENDERER) && !skip) { // find the right moment for frame renderer, when display is no longer blanked if ((pv->reg[1]&0x40) || y > 100) { - PicoFrameFull(); + if (Pico.est.rendstatus & PDRAW_SYNC_NEEDED) + PicoFrameFull(); #ifdef DRAW_FINISH_FUNC DRAW_FINISH_FUNC(); #endif + Pico.est.rendstatus &= ~PDRAW_SYNC_NEEDED; skip = 1; } } - // get samples from sound chips - if ((y == 224 || y == line_sample) && PsndOut) - { - cycles = SekCyclesDone(); - - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) - PicoSyncZ80(cycles); - if (ym2612.dacen && PsndDacLine <= y) - PsndDoDAC(y); -#ifdef PICO_CD - if (PicoAHW & PAHW_MCD) - pcd_sync_s68k(cycles, 0); -#endif -#ifdef PICO_32X - p32x_sync_sh2s(cycles); -#endif - PsndGetSamples(y); - } - // Run scanline: - line_base_cycles = SekCyclesDone(); - if (Pico.m.dma_xfers) SekCyclesBurn(CheckDMA()); + Pico.t.m68c_line_start = Pico.t.m68c_aim; + do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_LINE); + do_timing_hacks_end(pv); if (PicoLineHook) PicoLineHook(); pevt_log_m68k_o(EVT_NEXT_LINE); } + SyncCPUs(Pico.t.m68c_aim); + if (!skip) { - if (DrawScanline < y) - PicoDrawSync(y - 1, 0); + if (Pico.est.DrawScanline < y) + PicoVideoSync(-1); #ifdef DRAW_FINISH_FUNC DRAW_FINISH_FUNC(); #endif + Pico.est.rendstatus &= ~PDRAW_SYNC_NEEDED; } +#ifdef PICO_32X + p32x_render_frame(); +#endif - // V-int line (224 or 240) + // === VBLANK, 1st line === + lines_vis = (pv->reg[1] & 8) ? 240 : 224; + if (y == lines_vis) + pv->status &= ~PVS_ACTIVE; Pico.m.scanline = y; - pv->v_counter = 0xe0; // bad for 240 mode - if ((pv->reg[12]&6) == 6) pv->v_counter = 0xc1; + pv->v_counter = PicoVideoGetV(y, 0); - // VDP FIFO - pv->lwrite_cnt=0; - Pico.video.status|=0x200; - - memcpy(PicoPadInt, PicoPad, sizeof(PicoPadInt)); + memcpy(PicoIn.padInt, PicoIn.pad, sizeof(PicoIn.padInt)); PAD_DELAY(); - // Last H-Int: + // Last H-Int (normally): if (--hint < 0) { - hint=pv->reg[10]; // Reload H-Int counter - pv->pending_ints|=0x10; - //printf("rhint: %i @ %06x [%i|%i]\n", hint, SekPc, y, SekCyclesDone()); - if (pv->reg[0]&0x10) SekInterrupt(4); + hint = pv->reg[10]; // Reload H-Int counter + do_hint(pv); } - pv->status|=0x08; // go into vblank - pv->pending_ints|=0x20; + pv->status |= SR_VB | PVS_VB2; // go into vblank +#ifdef PICO_32X + p32x_start_blank(); +#endif // the following SekRun is there for several reasons: // there must be a delay after vblank bit is set and irq is asserted (Mazin Saga) // also delay between F bit (bit 7) is set in SR and IRQ happens (Ex-Mutants) // also delay between last H-int and V-int (Golden Axe 3) - line_base_cycles = SekCyclesDone(); - if (Pico.m.dma_xfers) SekCyclesBurn(CheckDMA()); + Pico.t.m68c_line_start = Pico.t.m68c_aim; + PicoVideoFIFOMode(pv->reg[1]&0x40, pv->reg[12]&1); + do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_VINT_LAG); - if (pv->reg[1]&0x20) { - elprintf(EL_INTS, "vint: @ %06x [%i]", SekPc, SekCyclesDone()); + SyncCPUs(Pico.t.m68c_aim); + + pv->status |= SR_F; + pv->pending_ints |= 0x20; + + if (pv->reg[1] & 0x20) { + if (Pico.t.m68c_cnt - Pico.t.m68c_aim < 60) // CPU blocked? + SekExecM68k(11); // HACK + elprintf(EL_INTS, "vint: @ %06x [%u]", SekPc, SekCyclesDone()); SekInterrupt(6); } - cycles = SekCyclesDone(); - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) { - PicoSyncZ80(cycles); + // assert Z80 interrupt for one scanline even in busrq hold (Teddy Blues) + if (/*Pico.m.z80Run &&*/ !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) { elprintf(EL_INTS, "zint"); - z80_int(); - } - -#ifdef PICO_CD - if (PicoAHW & PAHW_MCD) - pcd_sync_s68k(cycles, 0); -#endif -#ifdef PICO_32X - p32x_sync_sh2s(cycles); - p32x_start_blank(); -#endif - - // get samples from sound chips - if (y == 224 && PsndOut) - { - if (ym2612.dacen && PsndDacLine <= y) - PsndDoDAC(y); - PsndGetSamples(y); + z80_int_assert(1); } // Run scanline: - CPUS_RUN(CYCLES_M68K_LINE - CYCLES_M68K_VINT_LAG - CYCLES_M68K_ASD); + CPUS_RUN(CYCLES_M68K_LINE - CYCLES_M68K_VINT_LAG); + do_timing_hacks_end(pv); if (PicoLineHook) PicoLineHook(); pevt_log_m68k_o(EVT_NEXT_LINE); - lines = scanlines_total; - vcnt_wrap = Pico.m.pal ? 0x103 : 0xEB; // based on Gens, TODO: verify + if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoIn.opt&POPT_EN_Z80)) + PicoSyncZ80(Pico.t.m68c_aim); + z80_int_assert(0); - for (y++; y < lines; y++) + // === VBLANK === + lines = Pico.m.pal ? 313 : 262; + for (y++; y < lines - 1; y++) { - pv->v_counter = Pico.m.scanline = y; - if (y >= vcnt_wrap) - pv->v_counter -= Pico.m.pal ? 56 : 6; - if ((pv->reg[12]&6) == 6) - pv->v_counter = (pv->v_counter << 1) | 1; - pv->v_counter &= 0xff; + Pico.m.scanline = y; + pv->v_counter = PicoVideoGetV(y, 1); PAD_DELAY(); + if (unlikely(pv->status & PVS_ACTIVE) && --hint < 0) + { + hint = pv->reg[10]; // Reload H-Int counter + do_hint(pv); + } + // Run scanline: - line_base_cycles = SekCyclesDone(); - if (Pico.m.dma_xfers) SekCyclesBurn(CheckDMA()); + Pico.t.m68c_line_start = Pico.t.m68c_aim; + do_timing_hacks_start(pv); CPUS_RUN(CYCLES_M68K_LINE); + do_timing_hacks_end(pv); if (PicoLineHook) PicoLineHook(); pevt_log_m68k_o(EVT_NEXT_LINE); } - // sync cpus - cycles = SekCyclesDone(); - if (Pico.m.z80Run && !Pico.m.z80_reset && (PicoOpt&POPT_EN_Z80)) - PicoSyncZ80(cycles); - if (PsndOut && ym2612.dacen && PsndDacLine <= lines-1) - PsndDoDAC(lines-1); + if (unlikely(PicoIn.overclockM68k)) { + unsigned int l = PicoIn.overclockM68k * lines / 100; + while (l-- > 0) { + Pico.t.m68c_cnt -= CYCLES_M68K_LINE; + do_timing_hacks_start(pv); + SekSyncM68k(0); + do_timing_hacks_end(pv); + } + } -#ifdef PICO_CD - if (PicoAHW & PAHW_MCD) - pcd_sync_s68k(cycles, 0); -#endif + // === VBLANK last line === + pv->status &= ~(SR_VB | PVS_VB2); + pv->status |= ((pv->reg[1] >> 3) ^ SR_VB) & SR_VB; // forced blanking + + // last scanline + Pico.m.scanline = y++; + pv->v_counter = 0xff; + + PAD_DELAY(); + + if (unlikely(pv->status & PVS_ACTIVE)) { + if (--hint < 0) { + hint = pv->reg[10]; // Reload H-Int counter + do_hint(pv); + } + } + else + hint = pv->reg[10]; + + // Run scanline: + Pico.t.m68c_line_start = Pico.t.m68c_aim; + PicoVideoFIFOMode(pv->reg[1]&0x40, pv->reg[12]&1); + do_timing_hacks_start(pv); + CPUS_RUN(CYCLES_M68K_LINE); + do_timing_hacks_end(pv); + + if (PicoLineHook) PicoLineHook(); + pevt_log_m68k_o(EVT_NEXT_LINE); + + SyncCPUs(Pico.t.m68c_aim); #ifdef PICO_32X - p32x_sync_sh2s(cycles); + p32x_end_blank(); #endif - timers_cycle(); + + // get samples from sound chips + PsndGetSamples(y); + + timers_cycle(cycles_68k_to_z80(Pico.t.m68c_aim - Pico.t.m68c_frame_start)); + z80_resetCycles(); + + pv->hint_cnt = hint; return 0; } diff --git a/pico/pico_int.h b/pico/pico_int.h index 41dc59dc..4caa800d 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -9,10 +9,10 @@ #ifndef PICO_INTERNAL_INCLUDED #define PICO_INTERNAL_INCLUDED - #include -#include #include +#include "pico_types.h" +#include "pico_port.h" #include "pico.h" #include "carthw/carthw.h" @@ -32,10 +32,9 @@ extern "C" { #endif - // ----------------------- 68000 CPU ----------------------- #ifdef EMU_C68K -#include "../cpu/cyclone/Cyclone.h" +#include extern struct Cyclone PicoCpuCM68k, PicoCpuCS68k; #define SekCyclesLeft PicoCpuCM68k.cycles // cycles left for this run #define SekCyclesLeftS68k PicoCpuCS68k.cycles @@ -45,8 +44,8 @@ extern struct Cyclone PicoCpuCM68k, PicoCpuCS68k; #define SekDarS68k(x) (x < 8 ? PicoCpuCS68k.d[x] : PicoCpuCS68k.a[x - 8]) #define SekSr CycloneGetSr(&PicoCpuCM68k) #define SekSrS68k CycloneGetSr(&PicoCpuCS68k) -#define SekSetStop(x) { PicoCpuCM68k.state_flags&=~1; if (x) { PicoCpuCM68k.state_flags|=1; PicoCpuCM68k.cycles=0; } } -#define SekSetStopS68k(x) { PicoCpuCS68k.state_flags&=~1; if (x) { PicoCpuCS68k.state_flags|=1; PicoCpuCS68k.cycles=0; } } +#define SekSetStop(x) { PicoCpuCM68k.state_flags&=~1; if (x) { PicoCpuCM68k.state_flags|=1; SekEndRun(0); } } +#define SekSetStopS68k(x) { PicoCpuCS68k.state_flags&=~1; if (x) { PicoCpuCS68k.state_flags|=1; SekEndRunS68k(0); } } #define SekIsStoppedM68k() (PicoCpuCM68k.state_flags&1) #define SekIsStoppedS68k() (PicoCpuCS68k.state_flags&1) #define SekShouldInterrupt() (PicoCpuCM68k.irq > (PicoCpuCM68k.srh&7)) @@ -60,7 +59,7 @@ extern struct Cyclone PicoCpuCM68k, PicoCpuCS68k; #endif #ifdef EMU_F68K -#include "../cpu/fame/fame.h" +#include extern M68K_CONTEXT PicoCpuFM68k, PicoCpuFS68k; #define SekCyclesLeft PicoCpuFM68k.io_cycle_counter #define SekCyclesLeftS68k PicoCpuFS68k.io_cycle_counter @@ -72,15 +71,15 @@ extern M68K_CONTEXT PicoCpuFM68k, PicoCpuFS68k; #define SekSrS68k PicoCpuFS68k.sr #define SekSetStop(x) { \ PicoCpuFM68k.execinfo &= ~FM68K_HALTED; \ - if (x) { PicoCpuFM68k.execinfo |= FM68K_HALTED; PicoCpuFM68k.io_cycle_counter = 0; } \ + if (x) { PicoCpuFM68k.execinfo |= FM68K_HALTED; SekEndRun(0); } \ } #define SekSetStopS68k(x) { \ PicoCpuFS68k.execinfo &= ~FM68K_HALTED; \ - if (x) { PicoCpuFS68k.execinfo |= FM68K_HALTED; PicoCpuFS68k.io_cycle_counter = 0; } \ + if (x) { PicoCpuFS68k.execinfo |= FM68K_HALTED; SekEndRunS68k(0); } \ } #define SekIsStoppedM68k() (PicoCpuFM68k.execinfo&FM68K_HALTED) #define SekIsStoppedS68k() (PicoCpuFS68k.execinfo&FM68K_HALTED) -#define SekShouldInterrupt() fm68k_would_interrupt() +#define SekShouldInterrupt() fm68k_would_interrupt(&PicoCpuFM68k) #define SekNotPolling PicoCpuFM68k.not_polling #define SekNotPollingS68k PicoCpuFS68k.not_polling @@ -91,7 +90,10 @@ extern M68K_CONTEXT PicoCpuFM68k, PicoCpuFS68k; #endif #ifdef EMU_M68K -#include "../cpu/musashi/m68kcpu.h" +#include +#undef INLINE +#undef USE_CYCLES +#undef ADD_CYCLES extern m68ki_cpu_core PicoCpuMM68k, PicoCpuMS68k; #ifndef SekCyclesLeft #define SekCyclesLeft PicoCpuMM68k.cyc_remaining_cycles @@ -103,51 +105,37 @@ extern m68ki_cpu_core PicoCpuMM68k, PicoCpuMS68k; #define SekSr m68k_get_reg(&PicoCpuMM68k, M68K_REG_SR) #define SekSrS68k m68k_get_reg(&PicoCpuMS68k, M68K_REG_SR) #define SekSetStop(x) { \ - if(x) { SET_CYCLES(0); PicoCpuMM68k.stopped=STOP_LEVEL_STOP; } \ + if(x) { PicoCpuMM68k.stopped=STOP_LEVEL_STOP; SekEndRun(0)} \ else PicoCpuMM68k.stopped=0; \ } #define SekSetStopS68k(x) { \ - if(x) { SET_CYCLES(0); PicoCpuMS68k.stopped=STOP_LEVEL_STOP; } \ + if(x) { PicoCpuMS68k.stopped=STOP_LEVEL_STOP; SekEndRunS68k(0); } \ else PicoCpuMS68k.stopped=0; \ } #define SekIsStoppedM68k() (PicoCpuMM68k.stopped==STOP_LEVEL_STOP) #define SekIsStoppedS68k() (PicoCpuMS68k.stopped==STOP_LEVEL_STOP) -#define SekShouldInterrupt() (CPU_INT_LEVEL > FLAG_INT_MASK) +#define SekShouldInterrupt() (PicoCpuMM68k.int_level > PicoCpuMM68k.int_mask) #define SekNotPolling PicoCpuMM68k.not_polling #define SekNotPollingS68k PicoCpuMS68k.not_polling -#define SekInterrupt(irq) { \ - void *oldcontext = m68ki_cpu_p; \ - m68k_set_context(&PicoCpuMM68k); \ - m68k_set_irq(irq); \ - m68k_set_context(oldcontext); \ -} -#define SekIrqLevel (PicoCpuMM68k.int_level >> 8) +// avoid m68k_set_irq() for delaying to work +#define SekInterrupt(irq) PicoCpuMM68k.int_level = (irq) << 8 +#define SekIrqLevel (PicoCpuMM68k.int_level >> 8) #endif #endif // EMU_M68K -// while running, cnt represents target of current timeslice -// while not in SekRun(), it's actual cycles done -// (but always use SekCyclesDone() if you need current position) -// cnt may change if timeslice is ended prematurely or extended, -// so we use SekCycleAim for the actual target -extern unsigned int SekCycleCnt; -extern unsigned int SekCycleAim; - // number of cycles done (can be checked anywhere) -#define SekCyclesDone() (SekCycleCnt - SekCyclesLeft) +#define SekCyclesDone() (Pico.t.m68c_cnt - SekCyclesLeft) // burn cycles while not in SekRun() and while in -#define SekCyclesBurn(c) SekCycleCnt += c -#define SekCyclesBurnRun(c) { \ - SekCyclesLeft -= c; \ -} +#define SekCyclesBurn(c) Pico.t.m68c_cnt += c +#define SekCyclesBurnRun(c) SekCyclesLeft -= c // note: sometimes may extend timeslice to delay an irq #define SekEndRun(after) { \ - SekCycleCnt -= SekCyclesLeft - (after); \ + Pico.t.m68c_cnt -= SekCyclesLeft - (after); \ SekCyclesLeft = after; \ } @@ -174,28 +162,31 @@ extern unsigned int SekCycleAimS68k; // ----------------------- Z80 CPU ----------------------- #if defined(_USE_DRZ80) -#include "../cpu/DrZ80/drz80.h" +#include extern struct DrZ80 drZ80; #define z80_run(cycles) ((cycles) - DrZ80Run(&drZ80, cycles)) #define z80_run_nr(cycles) DrZ80Run(&drZ80, cycles) #define z80_int() drZ80.Z80_IRQ = 1 -#define z80_int() drZ80.Z80_IRQ = 1 +#define z80_int_assert(a) drZ80.Z80_IRQ = (a ? 2 : 0) #define z80_nmi() drZ80.Z80IF |= 8 #define z80_cyclesLeft drZ80.cycles +#define z80_subCLeft(c) drZ80.cycles -= c #define z80_pc() (drZ80.Z80PC - drZ80.Z80PC_BASE) #elif defined(_USE_CZ80) -#include "../cpu/cz80/cz80.h" +#include #define z80_run(cycles) Cz80_Exec(&CZ80, cycles) #define z80_run_nr(cycles) Cz80_Exec(&CZ80, cycles) #define z80_int() Cz80_Set_IRQ(&CZ80, 0, HOLD_LINE) -#define z80_nmi() Cz80_Set_IRQ(&CZ80, IRQ_LINE_NMI, 0) +#define z80_int_assert(a) Cz80_Set_IRQ(&CZ80, 0, (a) ? ASSERT_LINE : CLEAR_LINE) +#define z80_nmi() Cz80_Set_IRQ(&CZ80, IRQ_LINE_NMI, ASSERT_LINE) #define z80_cyclesLeft (CZ80.ICount - CZ80.ExtraCycles) +#define z80_subCLeft(c) CZ80.ICount -= c #define z80_pc() Cz80_Get_Reg(&CZ80, CZ80_PC) #else @@ -203,30 +194,28 @@ extern struct DrZ80 drZ80; #define z80_run(cycles) (cycles) #define z80_run_nr(cycles) #define z80_int() +#define z80_int_assert(a) #define z80_nmi() #endif #define Z80_STATE_SIZE 0x60 -extern unsigned int last_z80_sync; -extern int z80_cycle_cnt; /* 'done' z80 cycles before z80_run() */ -extern int z80_cycle_aim; -extern int z80_scanline; -extern int z80_scanline_cycles; /* cycles done until z80_scanline */ - -#define z80_resetCycles() \ - last_z80_sync = SekCyclesDone(); \ - z80_cycle_cnt = z80_cycle_aim = z80_scanline = z80_scanline_cycles = 0; +#define z80_resetCycles() { \ + Pico.t.z80c_cnt -= Pico.t.z80c_aim, Pico.t.z80c_aim = Pico.t.z80_scanline = 0; \ + if (!Pico.m.z80Run || Pico.m.z80_reset) Pico.t.z80c_cnt = 0; \ +} #define z80_cyclesDone() \ - (z80_cycle_aim - z80_cyclesLeft) + (Pico.t.z80c_aim - z80_cyclesLeft) -#define cycles_68k_to_z80(x) ((x)*957 >> 11) +// 68k clock = OSC/7, z80 clock = OSC/15, 68k:z80 ratio = 7/15 = 3822.9/8192 +#define cycles_68k_to_z80(x) ((x) * 3823 >> 13) +#define cycles_z80_to_68k(x) ((x) * 8777 >> 12) // ----------------------- SH2 CPU ----------------------- -#include "cpu/sh2/sh2.h" +#include extern SH2 sh2s[2]; #define msh2 sh2s[0] @@ -242,30 +231,33 @@ extern SH2 sh2s[2]; # define sh2_cycles_left(sh2) (sh2)->icount # define sh2_burn_cycles(sh2, n) (sh2)->icount -= n # define sh2_pc(sh2) (sh2)->ppc +# define sh2_not_polling(sh2) (sh2)->no_polling +# define sh2_set_polling(sh2) (sh2)->no_polling = 0 #else # define sh2_end_run(sh2, after_) do { \ - int left_ = (signed int)(sh2)->sr >> 12; \ - if (left_ > (after_)) { \ - (sh2)->cycles_timeslice -= left_ - (after_); \ - (sh2)->sr &= 0xfff; \ - (sh2)->sr |= (after_) << 12; \ + int left_ = ((signed int)(sh2)->sr >> 12) - (after_); \ + if (left_ > 0) { \ + (sh2)->cycles_timeslice -= left_; \ + (sh2)->sr -= (left_ << 12); \ } \ } while (0) # define sh2_cycles_left(sh2) ((signed int)(sh2)->sr >> 12) # define sh2_burn_cycles(sh2, n) (sh2)->sr -= ((n) << 12) # define sh2_pc(sh2) (sh2)->pc +# define sh2_not_polling(sh2) ((sh2)->sr & SH2_NO_POLLING) +# define sh2_set_polling(sh2) ((sh2)->sr &= ~SH2_NO_POLLING) #endif -#define sh2_cycles_done(sh2) ((int)(sh2)->cycles_timeslice - sh2_cycles_left(sh2)) +#define sh2_cycles_done(sh2) (unsigned)((int)(sh2)->cycles_timeslice - sh2_cycles_left(sh2)) #define sh2_cycles_done_t(sh2) \ - ((sh2)->m68krcycles_done * 3 + sh2_cycles_done(sh2)) + (unsigned)(C_M68K_TO_SH2(sh2, (sh2)->m68krcycles_done) + sh2_cycles_done(sh2)) #define sh2_cycles_done_m68k(sh2) \ - ((sh2)->m68krcycles_done + (sh2_cycles_done(sh2) / 3)) + (unsigned)((sh2)->m68krcycles_done + C_SH2_TO_M68K(sh2, sh2_cycles_done(sh2))) -#define sh2_reg(c, x) (c) ? ssh2.r[x] : msh2.r[x] -#define sh2_gbr(c) (c) ? ssh2.gbr : msh2.gbr -#define sh2_vbr(c) (c) ? ssh2.vbr : msh2.vbr -#define sh2_sr(c) (((c) ? ssh2.sr : msh2.sr) & 0xfff) +#define sh2_reg(c, x) ((c) ? ssh2.r[x] : msh2.r[x]) +#define sh2_gbr(c) ((c) ? ssh2.gbr : msh2.gbr) +#define sh2_vbr(c) ((c) ? ssh2.vbr : msh2.vbr) +#define sh2_sr(c) (((c) ? ssh2.sr : msh2.sr) & 0xfff) #define sh2_set_gbr(c, v) \ { if (c) ssh2.gbr = v; else msh2.gbr = v; } @@ -281,6 +273,36 @@ extern SH2 sh2s[2]; #define OSC_NTSC 53693100 #define OSC_PAL 53203424 +// PicoVideo.debug_p +#define PVD_KILL_A (1 << 0) +#define PVD_KILL_B (1 << 1) +#define PVD_KILL_S_LO (1 << 2) +#define PVD_KILL_S_HI (1 << 3) +#define PVD_KILL_32X (1 << 4) +#define PVD_FORCE_A (1 << 5) +#define PVD_FORCE_B (1 << 6) +#define PVD_FORCE_S (1 << 7) + +// PicoVideo.status, not part of real SR +#define SR_PAL (1 << 0) +#define SR_DMA (1 << 1) +#define SR_HB (1 << 2) +#define SR_VB (1 << 3) +#define SR_ODD (1 << 4) +#define SR_C (1 << 5) +#define SR_SOVR (1 << 6) +#define SR_F (1 << 7) +#define SR_FULL (1 << 8) +#define SR_EMPT (1 << 9) +// not part of real SR +#define PVS_ACTIVE (1 << 16) +#define PVS_VB2 (1 << 17) // ignores forced blanking +#define PVS_CPUWR (1 << 18) // CPU write blocked by FIFO full +#define PVS_CPURD (1 << 19) // CPU read blocked by FIFO not empty +#define PVS_DMAFILL (1 << 20) // DMA fill is waiting for fill data +#define PVS_DMABG (1 << 21) // background DMA operation is running +#define PVS_FIFORUN (1 << 22) // FIFO is processing + struct PicoVideo { unsigned char reg[0x20]; @@ -288,11 +310,18 @@ struct PicoVideo unsigned char pending; // 1 if waiting for second half of 32-bit command unsigned char type; // Command type (v/c/vsram read/write) unsigned short addr; // Read/Write address - int status; // Status bits + unsigned int status; // Status bits (SR) and extra flags unsigned char pending_ints; // pending interrupts: ??VH???? - signed char lwrite_cnt; // VDP write count during active display line + signed char pad1; // was VDP write count unsigned short v_counter; // V-counter - unsigned char pad[0x10]; + unsigned short debug; // raw debug register + unsigned char debug_p; // ... parsed: PVD_* + unsigned char addr_u; // bit16 of .addr + unsigned char hint_cnt; + unsigned char hint_irq; // irq# of HINT (4 on MD, 5 on Pico) + unsigned short hv_latch; // latched hvcounter value + signed int fifo_cnt; // pending xfers for blocking FIFO queue entries + signed int fifo_bgcnt; // pending xfers for background FIFO queue entries }; struct PicoMisc @@ -314,22 +343,68 @@ struct PicoMisc unsigned char eeprom_cycle; // EEPROM cycle number unsigned char eeprom_slave; // EEPROM slave word for X24C02 and better SRAMs unsigned char eeprom_status; - unsigned char pad2; - unsigned short dma_xfers; // 18 + unsigned char pad1; // was ym2612 status + unsigned short dma_xfers; // 18 unused (was VDP DMA transfer count) unsigned char eeprom_wb[2]; // EEPROM latch/write buffer unsigned int frame_count; // 1c for movies and idle det }; +#define PMS_HW_LCD 0x2 // GG LCD +#define PMS_HW_JAP 0x4 // japanese system +#define PMS_HW_FM 0x8 // FM sound +#define PMS_HW_TMS 0x10 // assume TMS9918 +#define PMS_HW_FMUSED 0x80 // FM sound accessed + +#define PMS_MAP_AUTO 0 +#define PMS_MAP_SEGA 1 +#define PMS_MAP_CODEM 2 +#define PMS_MAP_KOREA 3 +#define PMS_MAP_MSX 4 +#define PMS_MAP_N32K 5 +#define PMS_MAP_N16K 6 +#define PMS_MAP_JANGGUN 7 +#define PMS_MAP_NEMESIS 8 +#define PMS_MAP_8KBRAM 9 +#define PMS_MAP_XOR 10 +#define PMS_MAP_32KBRAM 11 + struct PicoMS { unsigned char carthw[0x10]; unsigned char io_ctl; unsigned char nmi_state; - unsigned char pad[0x4e]; + unsigned char mapper; + unsigned char fm_ctl; + unsigned char vdp_buffer; + unsigned char vdp_hlatch; + unsigned char io_gg[0x08]; + unsigned char mapcnt; + unsigned char io_sg; + unsigned char pad[0x40]; }; -// some assembly stuff depend on these, do not touch! -struct Pico +// emu state and data for the asm code +struct PicoEState +{ + int DrawScanline; + int rendstatus; + void *DrawLineDest; // draw destination + int DrawLineDestIncr; + unsigned char *HighCol; + s32 *HighPreSpr; + struct Pico *Pico; + unsigned short *PicoMem_vram; + unsigned short *PicoMem_cram; + unsigned int *PicoOpt; + unsigned char *Draw2FB; + int Draw2Width; + int Draw2Start; + unsigned short HighPal[0x100]; + unsigned short SonicPal[0x100]; + int SonicPalCount; +}; + +struct PicoMem { unsigned char ram[0x10000]; // 0x00000 scratch ram union { // vram is byteswapped for easier reads when drawing @@ -338,16 +413,9 @@ struct Pico }; unsigned char zram[0x2000]; // 0x20000 Z80 ram unsigned char ioports[0x10]; // XXX: fix asm and mv - unsigned char pad[0xf0]; // unused - unsigned short cram[0x40]; // 0x22100 - unsigned short vsram[0x40]; // 0x22180 - - unsigned char *rom; // 0x22200 - unsigned int romsize; // 0x22204 (on 32bits) - - struct PicoMisc m; - struct PicoVideo video; - struct PicoMS ms; + unsigned short cram[0x40]; // 0x22010 + unsigned char pad[0x70]; // 0x22050 DrawStripVSRam reads 0 from here + unsigned short vsram[0x40]; // 0x22100 }; // sram @@ -357,7 +425,7 @@ struct Pico #define SRF_ENABLED (1 << 0) #define SRF_EEPROM (1 << 1) -struct PicoSRAM +struct PicoCartSave { unsigned char *data; // actual data unsigned int start; // start address in 68k address space @@ -373,10 +441,68 @@ struct PicoSRAM unsigned int size; }; -// MCD -#include "cd/cd_sys.h" -#include "cd/LC89510.h" +struct PicoTiming +{ + // while running, cnt represents target of current timeslice + // while not in SekRun(), it's actual cycles done + // (but always use SekCyclesDone() if you need current position) + // _cnt may change if timeslice is ended prematurely or extended, + // so we use _aim for the actual target + unsigned int m68c_cnt; + unsigned int m68c_aim; + unsigned int m68c_frame_start; // m68k cycles + unsigned int m68c_line_start; + int refresh_delay; + unsigned int z80c_cnt; // z80 cycles done (this frame) + unsigned int z80c_aim; + unsigned int z80c_line_start; + int z80_scanline; + int z80_buscycles; + int z80_busdelay; + + int timer_a_next_oflow, timer_a_step; // in z80 cycles + int timer_b_next_oflow, timer_b_step; + int ym2612_busy; + + int vcnt_wrap, vcnt_adj; +}; + +struct PicoSound +{ + short len; // number of mono samples + short len_use; // adjusted + int len_e_add; // for non-int samples/frame + int len_e_cnt; + unsigned int clkl_mult; // z80 clocks per line in Q20 + unsigned int smpl_mult; // samples per line in Q16 + unsigned int cdda_mult, cdda_div; // 44.1 KHz resampling factor in Q16 + short dac_val, dac_val2; // last DAC sample + unsigned int dac_pos; // last DAC position in Q20 + unsigned int fm_pos; // last FM position in Q20 + unsigned int psg_pos; // last PSG position in Q16 + unsigned int ym2413_pos; // last YM2413 position + unsigned int pcm_pos; // last PCM position in Q16 + unsigned int fm_fir_mul, fm_fir_div; // ratio for FM resampling FIR +}; + +// run tools/mkoffsets pico/pico_int_offs.h if you change these +// careful with savestate compat +struct Pico +{ + struct PicoVideo video; + struct PicoMisc m; + struct PicoTiming t; + struct PicoCartSave sv; + struct PicoSound snd; + struct PicoEState est; + struct PicoMS ms; + + unsigned char *rom; + unsigned int romsize; +}; + +// MCD #define PCM_MIXBUF_LEN ((12500000 / 384) / 50 + 1) struct mcd_pcm @@ -395,65 +521,67 @@ struct mcd_pcm } ch[8]; }; -#define PCD_ST_S68K_RST 1 +#define PCD_ST_S68K_RST 1 +#define PCD_ST_S68K_SYNC 2 +#define PCD_ST_S68K_SLEEP 4 +#define PCD_ST_S68K_POLL 16 +#define PCD_ST_M68K_POLL 32 +#define PCD_ST_CDD_CMD 64 +#define PCD_ST_S68K_IFL2 0x100 struct mcd_misc { - unsigned short hint_vector; - unsigned char busreq; // not s68k_regs[1] - unsigned char s68k_pend_ints; - unsigned int state_flags; // 04 - unsigned int stopwatch_base_c; - unsigned short m68k_poll_a; - unsigned short m68k_poll_cnt; - unsigned short s68k_poll_a; - unsigned short s68k_poll_cnt; - unsigned int s68k_poll_clk; - unsigned char bcram_reg; // 18: battery-backed RAM cart register - unsigned char dmna_ret_2m; - unsigned short pad3; - int pad4[9]; + unsigned short hint_vector; + unsigned char busreq; // not s68k_regs[1] + unsigned char s68k_pend_ints; + unsigned int state_flags; // 04 + unsigned int stopwatch_base_c; + unsigned short m68k_poll_a; + unsigned short m68k_poll_cnt; + unsigned short s68k_poll_a; // 10 + unsigned short s68k_poll_cnt; + unsigned int s68k_poll_clk; + unsigned char bcram_reg; // 18: battery-backed RAM cart register + unsigned char dmna_ret_2m; + unsigned char need_sync; + unsigned char pad3; + unsigned int m68k_poll_clk; + int pad4[8]; }; typedef struct { - unsigned char bios[0x20000]; // 000000: 128K - union { // 020000: 512K - unsigned char prg_ram[0x80000]; - unsigned char prg_ram_b[4][0x20000]; - }; - union { // 0a0000: 256K - struct { - unsigned char word_ram2M[0x40000]; - unsigned char unused0[0x20000]; - }; - struct { - unsigned char unused1[0x20000]; - unsigned char word_ram1M[2][0x20000]; - }; - }; - union { // 100000: 64K - unsigned char pcm_ram[0x10000]; - unsigned char pcm_ram_b[0x10][0x1000]; - }; - // FIXME: should be short - unsigned char s68k_regs[0x200]; // 110000: GA, not CPU regs - unsigned char bram[0x2000]; // 110200: 8K - struct mcd_misc m; // 112200: misc - struct mcd_pcm pcm; // 112240: - _scd_toc TOC; // not to be saved - CDD cdd; - CDC cdc; - _scd scd; - int pcm_mixbuf[PCM_MIXBUF_LEN * 2]; - int pcm_mixpos; - char pcm_mixbuf_dirty; - char pcm_regs_dirty; + unsigned char bios[0x20000]; // 000000: 128K + union { // 020000: 512K + unsigned char prg_ram[0x80000]; + unsigned char prg_ram_b[4][0x20000]; + }; + union { // 0a0000: 256K + struct { + unsigned char word_ram2M[0x40000]; + unsigned char unused0[0x20000]; + }; + struct { + unsigned char unused1[0x20000]; + unsigned char word_ram1M[2][0x20000]; + }; + }; + union { // 100000: 64K + unsigned char pcm_ram[0x10000]; + unsigned char pcm_ram_b[0x10][0x1000]; + }; + unsigned char s68k_regs[0x200]; // 110000: GA, not CPU regs + unsigned char bram[0x2000]; // 110200: 8K + struct mcd_misc m; // 112200: misc + struct mcd_pcm pcm; // 112240: + void *cdda_stream; + int cdda_type; + int pcm_mixbuf[PCM_MIXBUF_LEN * 2]; + int pcm_mixpos; + char pcm_mixbuf_dirty; + char pcm_regs_dirty; } mcd_state; -// XXX: this will need to be reworked for cart+cd support. -#define Pico_mcd ((mcd_state *)Pico.rom) - // 32X #define P32XS_FM (1<<15) #define P32XS_nCART (1<< 8) @@ -485,6 +613,7 @@ typedef struct #define P32XF_68KCPOLL (1 << 0) #define P32XF_68KVPOLL (1 << 1) #define P32XF_Z80_32X_IO (1 << 7) // z80 does 32x io +#define P32XF_DRC_ROM_C (1 << 8) // cached code from ROM #define P32XI_VRES (1 << 14/2) // IRL/2 #define P32XI_VINT (1 << 12/2) @@ -492,8 +621,8 @@ typedef struct #define P32XI_CMD (1 << 8/2) #define P32XI_PWM (1 << 6/2) -// peripheral reg access -#define PREG8(regs,offs) ((unsigned char *)regs)[offs ^ 3] +// peripheral reg access (32 bit regs) +#define PREG8(regs,offs) ((unsigned char *)regs)[MEM_BE4(offs)] #define DMAC_FIFO_LEN (4*2) #define PWM_BUFF_LEN 1024 // in one channel samples @@ -514,26 +643,28 @@ struct Pico32x unsigned int emu_flags; unsigned char sh2irq_mask[2]; unsigned char sh2irqi[2]; // individual - unsigned int sh2irqs; // common irqs + unsigned int pad4; // was sh2irqs unsigned short dmac_fifo[DMAC_FIFO_LEN]; unsigned int pad[4]; unsigned int dmac0_fifo_ptr; unsigned short vdp_fbcr_fake; unsigned short pad2; - unsigned char comm_dirty_68k; - unsigned char comm_dirty_sh2; + unsigned char comm_dirty; + unsigned char pad3; // was comm_dirty_sh2 unsigned char pwm_irq_cnt; unsigned char pad1; unsigned short pwm_p[2]; // pwm pos in fifo unsigned int pwm_cycle_p; // pwm play cursor (32x cycles) - unsigned int reserved[6]; + unsigned int hint_counter; + unsigned int reserved[5]; }; struct Pico32xMem { unsigned char sdram[0x40000]; #ifdef DRC_SH2 - unsigned short drcblk_ram[1 << (18 - SH2_DRCBLK_RAM_SHIFT)]; + unsigned char drcblk_ram[1 << (18 - SH2_DRCBLK_RAM_SHIFT)]; + unsigned char drclit_ram[1 << (18 - SH2_DRCBLK_RAM_SHIFT)]; #endif unsigned short dram[2][0x20000/2]; // AKA fb union { @@ -541,7 +672,8 @@ struct Pico32xMem unsigned char m68k_rom_bank[0x10000]; // M68K_BANK_SIZE }; #ifdef DRC_SH2 - unsigned short drcblk_da[2][1 << (12 - SH2_DRCBLK_DA_SHIFT)]; + unsigned char drcblk_da[2][1 << (12 - SH2_DRCBLK_DA_SHIFT)]; + unsigned char drclit_da[2][1 << (12 - SH2_DRCBLK_DA_SHIFT)]; #endif union { unsigned char b[0x800]; @@ -554,8 +686,8 @@ struct Pico32xMem unsigned short pal[0x100]; unsigned short pal_native[0x100]; // converted to native (for renderer) signed short pwm[2*PWM_BUFF_LEN]; // PWM buffer for current frame - signed short pwm_current[2]; // current converted samples unsigned short pwm_fifo[2][4]; // [0] - current raw, others - fifo entries + unsigned pwm_index[2]; // ringbuffer index for pwm_fifo }; // area.c @@ -579,64 +711,98 @@ extern void (*PicoCartUnloadHook)(void); int CM_compareRun(int cyc, int is_sub); // draw.c +void PicoDrawInit(void); PICO_INTERNAL void PicoFrameStart(void); -void PicoDrawSync(int to, int blank_last_line); -void BackFill(int reg7, int sh); -void FinalizeLine555(int sh, int line); +void PicoDrawRefreshSprites(void); +void PicoDrawBgcDMA(u16 *base, u32 source, u32 mask, int len, int sl); +void PicoDrawSync(int to, int blank_last_line, int limit_sprites); +void BackFill(int reg7, int sh, struct PicoEState *est); +void FinalizeLine555(int sh, int line, struct PicoEState *est); +void FinalizeLine8bit(int sh, int line, struct PicoEState *est); +void PicoDrawSetOutBufMD(void *dest, int increment); extern int (*PicoScanBegin)(unsigned int num); extern int (*PicoScanEnd)(unsigned int num); -extern int DrawScanline; -#define MAX_LINE_SPRITES 29 -extern unsigned char HighLnSpr[240][3 + MAX_LINE_SPRITES]; +#define MAX_LINE_SPRITES 27 // +1 last sprite width, +4 hdr; total 32 +extern unsigned char HighLnSpr[240][4+MAX_LINE_SPRITES+1]; +extern unsigned char *HighColBase; +extern int HighColIncrement; extern void *DrawLineDestBase; extern int DrawLineDestIncrement; +extern u32 VdpSATCache[2*128]; // draw2.c -PICO_INTERNAL void PicoFrameFull(); +void PicoDraw2SetOutBuf(void *dest, int incr); +void PicoDraw2Init(void); +PICO_INTERNAL void PicoFrameFull(void); // mode4.c -void PicoFrameStartMode4(void); -void PicoLineMode4(int line); -void PicoDoHighPal555M4(void); -void PicoDrawSetOutputMode4(pdso_t which); +void PicoFrameStartSMS(void); +void PicoParseSATSMS(int line); +void PicoLineSMS(int line); +void PicoDoHighPal555SMS(void); +void PicoDrawSetOutputSMS(pdso_t which); // memory.c PICO_INTERNAL void PicoMemSetup(void); -unsigned int PicoRead8_io(unsigned int a); -unsigned int PicoRead16_io(unsigned int a); -void PicoWrite8_io(unsigned int a, unsigned int d); -void PicoWrite16_io(unsigned int a, unsigned int d); +PICO_INTERNAL u32 PicoRead16_floating(u32 a); +u32 PicoRead8_io(u32 a); +u32 PicoRead16_io(u32 a); +void PicoWrite8_io(u32 a, u32 d); +void PicoWrite16_io(u32 a, u32 d); // pico/memory.c PICO_INTERNAL void PicoMemSetupPico(void); +// cd/cdc.c +void cdc_init(void); +void cdc_reset(void); +int cdc_context_save(unsigned char *state); +int cdc_context_load(unsigned char *state); +int cdc_context_load_old(unsigned char *state); +void cdc_dma_update(void); +int cdc_decoder_update(unsigned char header[4]); +void cdc_reg_w(unsigned char data); +unsigned char cdc_reg_r(void); +unsigned short cdc_host_r(void); + +// cd/cdd.c +void cdd_reset(void); +int cdd_context_save(unsigned char *state); +int cdd_context_load(unsigned char *state); +int cdd_context_load_old(unsigned char *state); +void cdd_read_data(unsigned char *dst); +void cdd_read_audio(unsigned int samples); +void cdd_update(void); +void cdd_process(void); + +// cd/cd_image.c +int load_cd_image(const char *cd_img_name, int *type); + // cd/gfx.c void gfx_init(void); -void gfx_start(unsigned int base); +void gfx_start(u32 base); void gfx_update(unsigned int cycles); int gfx_context_save(unsigned char *state); int gfx_context_load(const unsigned char *state); // cd/gfx_dma.c -void DmaSlowCell(unsigned int source, unsigned int a, int len, unsigned char inc); +void DmaSlowCell(u32 source, u32 a, int len, unsigned char inc); // cd/memory.c +extern u32 pcd_base_address; PICO_INTERNAL void PicoMemSetupCD(void); -unsigned int PicoRead8_mcd_io(unsigned int a); -unsigned int PicoRead16_mcd_io(unsigned int a); -void PicoWrite8_mcd_io(unsigned int a, unsigned int d); -void PicoWrite16_mcd_io(unsigned int a, unsigned int d); +u32 PicoRead8_mcd_io(u32 a); +u32 PicoRead16_mcd_io(u32 a); +void PicoWrite8_mcd_io(u32 a, u32 d); +void PicoWrite16_mcd_io(u32 a, u32 d); void pcd_state_loaded_mem(void); // pico.c extern struct Pico Pico; -extern struct PicoSRAM SRam; -extern int PicoPadInt[2]; -extern int emustatus; -extern int scanlines_total; +extern struct PicoMem PicoMem; extern void (*PicoResetHook)(void); extern void (*PicoLineHook)(void); -PICO_INTERNAL int CheckDMA(void); +PICO_INTERNAL int CheckDMA(int cycles); PICO_INTERNAL void PicoDetectRegion(void); PICO_INTERNAL void PicoSyncZ80(unsigned int m68k_cycles_done); @@ -648,11 +814,14 @@ PICO_INTERNAL void PicoSyncZ80(unsigned int m68k_cycles_done); #define PCDS_IEN5 (1<<5) #define PCDS_IEN6 (1<<6) +extern mcd_state *Pico_mcd; + PICO_INTERNAL void PicoInitMCD(void); PICO_INTERNAL void PicoExitMCD(void); PICO_INTERNAL void PicoPowerMCD(void); PICO_INTERNAL int PicoResetMCD(void); PICO_INTERNAL void PicoFrameMCD(void); +PICO_INTERNAL void PicoMCDPrepare(void); enum pcd_event { PCD_EVENT_CDC, @@ -662,9 +831,12 @@ enum pcd_event { PCD_EVENT_COUNT, }; extern unsigned int pcd_event_times[PCD_EVENT_COUNT]; + void pcd_event_schedule(unsigned int now, enum pcd_event event, int after); void pcd_event_schedule_s68k(enum pcd_event event, int after); +void pcd_prepare_frame(void); unsigned int pcd_cycles_m68k_to_s68k(unsigned int c); +void pcd_irq_s68k(int irq, int state); int pcd_sync_s68k(unsigned int m68k_target, int m68k_poll_sync); void pcd_run_cpus(int m68k_cycles); void pcd_soft_reset(void); @@ -672,18 +844,26 @@ void pcd_state_loaded(void); // cd/pcm.c void pcd_pcm_sync(unsigned int to); -void pcd_pcm_update(int *buffer, int length, int stereo); +void pcd_pcm_update(s32 *buffer, int length, int stereo); void pcd_pcm_write(unsigned int a, unsigned int d); unsigned int pcd_pcm_read(unsigned int a); // pico/pico.c PICO_INTERNAL void PicoInitPico(void); PICO_INTERNAL void PicoReratePico(void); +PICO_INTERNAL int PicoPicoIrqAck(int level); // pico/xpcm.c PICO_INTERNAL void PicoPicoPCMUpdate(short *buffer, int length, int stereo); -PICO_INTERNAL void PicoPicoPCMReset(void); -PICO_INTERNAL void PicoPicoPCMRerate(int xpcm_rate); +PICO_INTERNAL void PicoPicoPCMResetN(int pin); +PICO_INTERNAL void PicoPicoPCMStartN(int pin); +PICO_INTERNAL int PicoPicoPCMBusyN(void); +PICO_INTERNAL void PicoPicoPCMGain(int gain); +PICO_INTERNAL void PicoPicoPCMFilter(int index); +PICO_INTERNAL void PicoPicoPCMIrqEn(int enable); +PICO_INTERNAL void PicoPicoPCMRerate(void); +PICO_INTERNAL int PicoPicoPCMSave(void *buffer, int length); +PICO_INTERNAL void PicoPicoPCMLoad(void *buffer, int length); // sek.c PICO_INTERNAL void SekInit(void); @@ -705,50 +885,95 @@ void SekTrace(int is_s68k); PICO_INTERNAL void SekInitS68k(void); PICO_INTERNAL int SekResetS68k(void); PICO_INTERNAL int SekInterruptS68k(int irq); +void SekInterruptClearS68k(int irq); // sound/sound.c -PICO_INTERNAL void cdda_start_play(); extern short cdda_out_buffer[2*1152]; -extern int PsndLen_exc_cnt; -extern int PsndLen_exc_add; -extern int timer_a_next_oflow, timer_a_step; // in z80 cycles -extern int timer_b_next_oflow, timer_b_step; + +void cdda_start_play(int lba_base, int lba_offset, int lb_len); + +#define YM2612_NATIVE_RATE() (((Pico.m.pal?OSC_PAL:OSC_NTSC)/7 + 3*24) / (6*24)) void ym2612_sync_timers(int z80_cycles, int mode_old, int mode_new); void ym2612_pack_state(void); void ym2612_unpack_state(void); #define TIMER_NO_OFLOW 0x70000000 -// tA = 72 * (1024 - NA) / M -#define TIMER_A_TICK_ZCYCLES 17203 -// tB = 1152 * (256 - NA) / M -#define TIMER_B_TICK_ZCYCLES 262800 // 275251 broken, see Dai Makaimura -#define timers_cycle() \ - if (timer_a_next_oflow > 0 && timer_a_next_oflow < TIMER_NO_OFLOW) \ - timer_a_next_oflow -= Pico.m.pal ? 70938*256 : 59659*256; \ - if (timer_b_next_oflow > 0 && timer_b_next_oflow < TIMER_NO_OFLOW) \ - timer_b_next_oflow -= Pico.m.pal ? 70938*256 : 59659*256; \ +// tA = 24*3 * (1024 - TA) / M, with M = mclock/2 +#define TIMER_A_TICK_ZCYCLES cycles_68k_to_z80(256LL* 24*3*2) // Q8 +// tB = 16*24*3 * ( 256 - TB) / M +#define TIMER_B_TICK_ZCYCLES cycles_68k_to_z80(256LL*16*24*3*2) // Q8 +// busy = 32*3 / M +#define YMBUSY_ZCYCLES cycles_68k_to_z80(256LL* 32*3*2) // Q8 + +#define timers_cycle(ticks) \ + if (Pico.t.ym2612_busy > 0) \ + Pico.t.ym2612_busy -= ticks << 8; \ + if (Pico.t.timer_a_next_oflow < TIMER_NO_OFLOW) \ + Pico.t.timer_a_next_oflow -= ticks << 8; \ + if (Pico.t.timer_b_next_oflow < TIMER_NO_OFLOW) \ + Pico.t.timer_b_next_oflow -= ticks << 8; \ ym2612_sync_timers(0, ym2612.OPN.ST.mode, ym2612.OPN.ST.mode); #define timers_reset() \ - timer_a_next_oflow = timer_b_next_oflow = TIMER_NO_OFLOW; \ - timer_a_step = TIMER_A_TICK_ZCYCLES * 1024; \ - timer_b_step = TIMER_B_TICK_ZCYCLES * 256; + Pico.t.ym2612_busy = 0; \ + Pico.t.timer_a_next_oflow = Pico.t.timer_b_next_oflow = TIMER_NO_OFLOW; \ + Pico.t.timer_a_step = TIMER_A_TICK_ZCYCLES * 1024; \ + Pico.t.timer_b_step = TIMER_B_TICK_ZCYCLES * 256; \ + ym2612.OPN.ST.status &= ~3; +void *YM2413GetRegs(void); +void YM2413UnpackState(void); // videoport.c -extern int line_base_cycles; -PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d); -PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a); -PICO_INTERNAL_ASM unsigned int PicoVideoRead8(unsigned int a); -extern int (*PicoDmaHook)(unsigned int source, int len, unsigned short **srcp, unsigned short **limitp); +extern u32 SATaddr, SATmask; +static __inline void UpdateSAT(u32 a, u32 d) +{ + unsigned num = (a^SATaddr) >> 3; + + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; + ((u16 *)&VdpSATCache[2*num])[(a&7) >> 1] = d; +} +static __inline void VideoWriteVRAM(u32 a, u16 d) +{ + PicoMem.vram [(u16)a >> 1] = d; + + if (((a^SATaddr) & SATmask) == 0) + UpdateSAT(a, d); +} + +static __inline u8 PicoVideoGetV(int scanline, int maywrap) +{ + if (maywrap && scanline >= Pico.t.vcnt_wrap) scanline -= Pico.t.vcnt_adj; + if ((Pico.video.reg[12]&6) == 6) scanline = (scanline<<1) | 1; + return scanline; +} + +PICO_INTERNAL_ASM void PicoVideoWrite(u32 a,unsigned short d); +PICO_INTERNAL_ASM u32 PicoVideoRead(u32 a); +unsigned char PicoVideoRead8DataH(int is_from_z80); +unsigned char PicoVideoRead8DataL(int is_from_z80); +unsigned char PicoVideoRead8CtlH(int is_from_z80); +unsigned char PicoVideoRead8CtlL(int is_from_z80); +unsigned char PicoVideoRead8HV_H(int is_from_z80); +unsigned char PicoVideoRead8HV_L(int is_from_z80); +extern int (*PicoDmaHook)(u32 source, int len, unsigned short **base, u32 *mask); +void PicoVideoFIFOSync(int cycles); +int PicoVideoFIFOHint(void); +void PicoVideoFIFOMode(int active, int h40); +int PicoVideoFIFOWrite(int count, int byte_p, unsigned sr_mask, unsigned sr_flags); +void PicoVideoInit(void); +void PicoVideoReset(void); +void PicoVideoSync(int skip); +void PicoVideoSave(void); +void PicoVideoLoad(void); +void PicoVideoCacheSAT(int load); // misc.c -PICO_INTERNAL_ASM void memcpy16(unsigned short *dest, unsigned short *src, int count); PICO_INTERNAL_ASM void memcpy16bswap(unsigned short *dest, void *src, int count); -PICO_INTERNAL_ASM void memcpy32(int *dest, int *src, int count); // 32bit word count -PICO_INTERNAL_ASM void memset32(int *dest, int c, int count); +PICO_INTERNAL_ASM void memset32(void *dest, int c, int count); +PICO_INTERNAL_ASM void memset32_uncached(int *dest, int c, int count); // eeprom.c void EEPROM_write8(unsigned int a, unsigned int d); @@ -766,16 +991,19 @@ PICO_INTERNAL void z80_exit(void); PICO_INTERNAL_ASM void wram_2M_to_1M(unsigned char *m); PICO_INTERNAL_ASM void wram_1M_to_2M(unsigned char *m); -// cd/buffering.c -PICO_INTERNAL void PicoCDBufferRead(void *dest, int lba); - // sound/sound.c +PICO_INTERNAL void PsndInit(void); +PICO_INTERNAL void PsndExit(void); PICO_INTERNAL void PsndReset(void); -PICO_INTERNAL void PsndDoDAC(int line_to); +PICO_INTERNAL void PsndStartFrame(void); +PICO_INTERNAL void PsndDoDAC(int cycle_to); +PICO_INTERNAL void PsndDoPSG(int cyc_to); +PICO_INTERNAL void PsndDoYM2413(int cyc_to); +PICO_INTERNAL void PsndDoFM(int cyc_to); +PICO_INTERNAL void PsndDoPCM(int cyc_to); PICO_INTERNAL void PsndClear(void); PICO_INTERNAL void PsndGetSamples(int y); -PICO_INTERNAL void PsndGetSamplesMS(void); -extern int PsndDacLine; +PICO_INTERNAL void PsndGetSamplesMS(int y); // sms.c #ifndef NO_SMS @@ -809,34 +1037,49 @@ void Pico32xInit(void); void PicoPower32x(void); void PicoReset32x(void); void Pico32xStartup(void); +void Pico32xShutdown(void); void PicoUnload32x(void); void PicoFrame32x(void); void Pico32xStateLoaded(int is_early); +void Pico32xPrepare(void); void p32x_sync_sh2s(unsigned int m68k_target); void p32x_sync_other_sh2(SH2 *sh2, unsigned int m68k_target); -void p32x_update_irls(SH2 *active_sh2, int m68k_cycles); -void p32x_trigger_irq(SH2 *sh2, int m68k_cycles, unsigned int mask); -void p32x_update_cmd_irq(SH2 *sh2, int m68k_cycles); +void p32x_update_irls(SH2 *active_sh2, unsigned int m68k_cycles); +void p32x_trigger_irq(SH2 *sh2, unsigned int m68k_cycles, unsigned int mask); +void p32x_update_cmd_irq(SH2 *sh2, unsigned int m68k_cycles); void p32x_reset_sh2s(void); void p32x_event_schedule(unsigned int now, enum p32x_event event, int after); void p32x_event_schedule_sh2(SH2 *sh2, enum p32x_event event, int after); -void p32x_schedule_hint(SH2 *sh2, int m68k_cycles); +void p32x_schedule_hint(SH2 *sh2, unsigned int m68k_cycles); + +#define p32x_sh2_ready(sh2, cycles) \ + (CYCLES_GT(cycles,sh2->m68krcycles_done) && \ + !(sh2->state&(SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL))) // 32x/memory.c -struct Pico32xMem *Pico32xMem; -unsigned int PicoRead8_32x(unsigned int a); -unsigned int PicoRead16_32x(unsigned int a); -void PicoWrite8_32x(unsigned int a, unsigned int d); -void PicoWrite16_32x(unsigned int a, unsigned int d); +extern struct Pico32xMem *Pico32xMem; +u32 PicoRead8_32x(u32 a); +u32 PicoRead16_32x(u32 a); +void PicoWrite8_32x(u32 a, u32 d); +void PicoWrite16_32x(u32 a, u32 d); void PicoMemSetup32x(void); void Pico32xSwapDRAM(int b); void Pico32xMemStateLoaded(void); -void p32x_m68k_poll_event(unsigned int flags); -void p32x_sh2_poll_event(SH2 *sh2, unsigned int flags, unsigned int m68k_cycles); +void p32x_update_banks(void); +void p32x_m68k_poll_event(u32 a, u32 flags); +u32 REGPARM(3) p32x_sh2_poll_memory8(u32 a, u32 d, SH2 *sh2); +u32 REGPARM(3) p32x_sh2_poll_memory16(u32 a, u32 d, SH2 *sh2); +u32 REGPARM(3) p32x_sh2_poll_memory32(u32 a, u32 d, SH2 *sh2); +void *p32x_sh2_get_mem_ptr(u32 a, u32 *mask, SH2 *sh2); +int p32x_sh2_mem_is_rom(u32 a, SH2 *sh2); +void p32x_sh2_poll_detect(u32 a, SH2 *sh2, u32 flags, int maxcnt); +void p32x_sh2_poll_event(u32 a, SH2 *sh2, u32 flags, u32 m68k_cycles); +int p32x_sh2_memcpy(u32 dst, u32 src, int count, int size, SH2 *sh2); // 32x/draw.c void PicoDrawSetOutFormat32x(pdso_t which, int use_32x_line_mode); -void FinalizeLine32xRGB555(int sh, int line); +void PicoDrawSetOutBuf32X(void *dest, int increment); +void FinalizeLine32xRGB555(int sh, int line, struct PicoEState *est); void PicoDraw32xLayer(int offs, int lines, int mdbg); void PicoDraw32xLayerMdOnly(int offs, int lines); extern int (*PicoScan32xBegin)(unsigned int num); @@ -849,11 +1092,9 @@ enum { extern int Pico32xDrawMode; // 32x/pwm.c -unsigned int p32x_pwm_read16(unsigned int a, SH2 *sh2, - unsigned int m68k_cycles); -void p32x_pwm_write16(unsigned int a, unsigned int d, - SH2 *sh2, unsigned int m68k_cycles); -void p32x_pwm_update(int *buf32, int length, int stereo); +unsigned int p32x_pwm_read16(u32 a, SH2 *sh2, unsigned int m68k_cycles); +void p32x_pwm_write16(u32 a, unsigned int d, SH2 *sh2, unsigned int m68k_cycles); +void p32x_pwm_update(s32 *buf32, int length, int stereo); void p32x_pwm_ctl_changed(void); void p32x_pwm_schedule(unsigned int m68k_now); void p32x_pwm_schedule_sh2(SH2 *sh2); @@ -865,14 +1106,14 @@ void p32x_pwm_state_loaded(void); void p32x_dreq0_trigger(void); void p32x_dreq1_trigger(void); void p32x_timers_recalc(void); -void p32x_timers_do(unsigned int m68k_slice); +void p32x_timer_do(SH2 *sh2, unsigned int m68k_slice); void sh2_peripheral_reset(SH2 *sh2); -unsigned int sh2_peripheral_read8(unsigned int a, SH2 *sh2); -unsigned int sh2_peripheral_read16(unsigned int a, SH2 *sh2); -unsigned int sh2_peripheral_read32(unsigned int a, SH2 *sh2); -void REGPARM(3) sh2_peripheral_write8(unsigned int a, unsigned int d, SH2 *sh2); -void REGPARM(3) sh2_peripheral_write16(unsigned int a, unsigned int d, SH2 *sh2); -void REGPARM(3) sh2_peripheral_write32(unsigned int a, unsigned int d, SH2 *sh2); +u32 REGPARM(2) sh2_peripheral_read8(u32 a, SH2 *sh2); +u32 REGPARM(2) sh2_peripheral_read16(u32 a, SH2 *sh2); +u32 REGPARM(2) sh2_peripheral_read32(u32 a, SH2 *sh2); +void REGPARM(3) sh2_peripheral_write8(u32 a, u32 d, SH2 *sh2); +void REGPARM(3) sh2_peripheral_write16(u32 a, u32 d, SH2 *sh2); +void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2); #else #define Pico32xInit() @@ -899,7 +1140,7 @@ static __inline int isspace_(int c) // emulation event logging #ifndef EL_LOGMASK # ifdef __x86_64__ // HACK -# define EL_LOGMASK (EL_STATUS|EL_IDLE|EL_ANOMALY) +# define EL_LOGMASK (EL_STATUS|EL_ANOMALY) # else # define EL_LOGMASK (EL_STATUS) # endif @@ -994,28 +1235,10 @@ void pevt_dump(void); #define pevt_dump() #endif -// misc -#ifdef _MSC_VER -#define cdprintf -#else -#define cdprintf(x...) -#endif - -#if defined(__GNUC__) && defined(__i386__) -#define REGPARM(x) __attribute__((regparm(x))) -#else -#define REGPARM(x) -#endif - -#ifdef __GNUC__ -#define NOINLINE __attribute__((noinline)) -#else -#define NOINLINE -#endif - #ifdef __cplusplus } // End of extern "C" #endif #endif // PICO_INTERNAL_INCLUDED +// vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/pico_port.h b/pico/pico_port.h new file mode 100644 index 00000000..41a4ce2f --- /dev/null +++ b/pico/pico_port.h @@ -0,0 +1,83 @@ +#ifndef PICO_PORT_INCLUDED +#define PICO_PORT_INCLUDED + +// provide size_t, uintptr_t +#include +#if !(defined(_MSC_VER) && _MSC_VER < 1800) +#include +#endif +#include "pico_types.h" + +#ifdef USE_LIBRETRO_VFS +#include "file_stream_transforms.h" +#endif + +#if defined(__GNUC__) && defined(__i386__) +#define REGPARM(x) __attribute__((regparm(x))) +#else +#define REGPARM(x) +#endif + +#ifdef __GNUC__ +#define NOINLINE __attribute__((noinline)) +#define ALIGNED(n) __attribute__((aligned(n))) +#define unlikely(x) __builtin_expect((x), 0) +#define likely(x) __builtin_expect(!!(x), 1) +#else +#define NOINLINE +#define ALIGNED(n) +#define unlikely(x) (x) +#define likely(x) (x) +#endif + +#ifdef _MSC_VER +#define snprintf _snprintf +#define strcasecmp _stricmp +#define strncasecmp _strnicmp +#define strdup _strdup +#endif + + +// There's no standard way to determine endianess at compile time. Try using +// some well known non-standard macros for detection. +#if defined __BYTE_ORDER__ +#define CPU_IS_LE __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#elif defined __BYTE_ORDER +#define CPU_IS_LE __BYTE_ORDER == __LITTLE_ENDIAN +#elif defined __BIG_ENDIAN__ || defined _M_PPC // Windows on PPC was big endian +#define CPU_IS_LE 0 +#elif defined __LITTLE_ENDIAN__ || defined _WIN32 // all other Windows is LE +#define CPU_IS_LE 1 +#else +#warning "can't detect byte order, assume little endian" +#define CPU_IS_LE 1 +#endif +// NB mixed endian integer platforms are not supported. + +#if CPU_IS_LE +// address/offset operations +#define MEM_BE2(a) ((a)^1) // addr/offs of u8 in u16, or u16 in u32 +#define MEM_BE4(a) ((a)^3) // addr/offs of u8 in u32 +#define MEM_LE2(a) (a) +#define MEM_LE4(a) (a) +// swapping +#define CPU_BE2(v) ((u32)((u64)(v)<<16)|((u32)(v)>>16)) +#define CPU_BE4(v) (((u32)(v)>>24)|(((v)>>8)&0x00ff00)| \ + (((v)<<8)&0xff0000)|(u32)((v)<<24)) +#define CPU_LE2(v) (v) // swap of 2*u16 in u32 +#define CPU_LE4(v) (v) // swap of 4*u8 in u32 +#else +// address/offset operations +#define MEM_BE2(a) (a) +#define MEM_BE4(a) (a) +#define MEM_LE2(a) ((a)^1) +#define MEM_LE4(a) ((a)^3) +// swapping +#define CPU_BE2(v) (v) +#define CPU_BE4(v) (v) +#define CPU_LE2(v) ((u32)((u64)(v)<<16)|((u32)(v)>>16)) +#define CPU_LE4(v) (((u32)(v)>>24)|(((v)>>8)&0x00ff00)| \ + (((v)<<8)&0xff0000)|(u32)((v)<<24)) +#endif + +#endif // PICO_PORT_INCLUDED diff --git a/pico/pico_types.h b/pico/pico_types.h new file mode 100644 index 00000000..0c842e92 --- /dev/null +++ b/pico/pico_types.h @@ -0,0 +1,23 @@ +#ifndef PICO_TYPES +#define PICO_TYPES + +#include + +#ifndef __TAMTYPES_H__ +#ifndef UTYPES_DEFINED +typedef uint8_t u8; +typedef int8_t s8; +typedef uint16_t u16; +typedef int16_t s16; +typedef uint32_t u32; +typedef int32_t s32; +typedef uint64_t u64; +typedef int64_t s64; +#endif +#endif + +typedef uintptr_t uptr; /* unsigned pointer-sized int */ + +typedef unsigned int uint; /* printf casts */ +typedef unsigned long ulong; +#endif diff --git a/pico/sek.c b/pico/sek.c index 86a351f6..9bab033f 100644 --- a/pico/sek.c +++ b/pico/sek.c @@ -10,11 +10,6 @@ #include "pico_int.h" #include "memory.h" - -unsigned int SekCycleCnt; -unsigned int SekCycleAim; - - /* context */ // Cyclone 68000 #ifdef EMU_C68K @@ -30,15 +25,31 @@ M68K_CONTEXT PicoCpuFM68k; #endif +static int do_ack(int level) +{ + struct PicoVideo *pv = &Pico.video; + + elprintf(EL_INTS, "%cack: @ %06x [%u], p=%02x", + level == 6 ? 'v' : 'h', SekPc, SekCyclesDone(), pv->pending_ints); + // the VDP doesn't look at the 68k level + if (pv->pending_ints & pv->reg[1] & 0x20) { + pv->pending_ints &= ~0x20; + pv->status &= ~SR_F; + if (pv->reg[0] & pv->pending_ints & 0x10) + return pv->hint_irq; + } + else if (pv->pending_ints & pv->reg[0] & 0x10) + pv->pending_ints &= ~0x10; + + return (PicoIn.AHW & PAHW_PICO ? PicoPicoIrqAck(level) : 0); +} + /* callbacks */ #ifdef EMU_C68K // interrupt acknowledgment static int SekIntAck(int level) { - // try to emulate VDP's reaction to 68000 int ack - if (level == 4) { Pico.video.pending_ints = 0; elprintf(EL_INTS, "hack: @ %06x [%i]", SekPc, SekCycleCnt); } - else if(level == 6) { Pico.video.pending_ints &= ~0x20; elprintf(EL_INTS, "vack: @ %06x [%i]", SekPc, SekCycleCnt); } - PicoCpuCM68k.irq = 0; + PicoCpuCM68k.irq = do_ack(level); return CYCLONE_INT_ACK_AUTOVECTOR; } @@ -60,6 +71,8 @@ static int SekUnrecognizedOpcode() PicoCpuCM68k.state_flags |= 1; return 1; } + // happened once - may happen again + SekFinishIdleDet(); #ifdef EMU_M68K // debugging cyclone { extern int have_illegal; @@ -74,9 +87,7 @@ static int SekUnrecognizedOpcode() #ifdef EMU_M68K static int SekIntAckM68K(int level) { - if (level == 4) { Pico.video.pending_ints = 0; elprintf(EL_INTS, "hack: @ %06x [%i]", SekPc, SekCycleCnt); } - else if(level == 6) { Pico.video.pending_ints &= ~0x20; elprintf(EL_INTS, "vack: @ %06x [%i]", SekPc, SekCycleCnt); } - CPU_INT_LEVEL = 0; + CPU_INT_LEVEL = do_ack(level) << 8; return M68K_INT_ACK_AUTOVECTOR; } @@ -90,15 +101,7 @@ static int SekTasCallback(void) #ifdef EMU_F68K static void SekIntAckF68K(unsigned level) { - if (level == 4) { - Pico.video.pending_ints = 0; - elprintf(EL_INTS, "hack: @ %06x [%i]", SekPc, SekCyclesDone()); - } - else if(level == 6) { - Pico.video.pending_ints &= ~0x20; - elprintf(EL_INTS, "vack: @ %06x [%i]", SekPc, SekCyclesDone()); - } - PicoCpuFM68k.interrupts[0] = 0; + PicoCpuFM68k.interrupts[0] = do_ack(level); } #endif @@ -126,15 +129,10 @@ PICO_INTERNAL void SekInit(void) } #endif #ifdef EMU_F68K - { - void *oldcontext = g_m68kcontext; - g_m68kcontext = &PicoCpuFM68k; - memset(&PicoCpuFM68k, 0, sizeof(PicoCpuFM68k)); - fm68k_init(); - PicoCpuFM68k.iack_handler = SekIntAckF68K; - PicoCpuFM68k.sr = 0x2704; // Z flag - g_m68kcontext = oldcontext; - } + memset(&PicoCpuFM68k, 0, sizeof(PicoCpuFM68k)); + fm68k_init(); + PicoCpuFM68k.iack_handler = SekIntAckF68K; + PicoCpuFM68k.sr = 0x2704; // Z flag #endif } @@ -155,10 +153,7 @@ PICO_INTERNAL int SekReset(void) REG_USP = 0; // ? #endif #ifdef EMU_F68K - { - g_m68kcontext = &PicoCpuFM68k; - fm68k_reset(); - } + fm68k_reset(&PicoCpuFM68k); #endif return 0; @@ -166,17 +161,17 @@ PICO_INTERNAL int SekReset(void) void SekStepM68k(void) { - SekCycleAim=SekCycleCnt+1; + Pico.t.m68c_aim = Pico.t.m68c_cnt + 1; #if defined(EMU_CORE_DEBUG) - SekCycleCnt+=CM_compareRun(1, 0); + Pico.t.m68c_cnt += CM_compareRun(1, 0); #elif defined(EMU_C68K) PicoCpuCM68k.cycles=1; CycloneRun(&PicoCpuCM68k); - SekCycleCnt+=1-PicoCpuCM68k.cycles; + Pico.t.m68c_cnt += 1 - PicoCpuCM68k.cycles; #elif defined(EMU_M68K) - SekCycleCnt+=m68k_execute(1); + Pico.t.m68c_cnt += m68k_execute(1); #elif defined(EMU_F68K) - SekCycleCnt+=fm68k_emulate(1, 0); + Pico.t.m68c_cnt += fm68k_emulate(&PicoCpuFM68k, 1, 0); #endif } @@ -194,50 +189,53 @@ PICO_INTERNAL void SekSetRealTAS(int use_real) // XXX: rename PICO_INTERNAL void SekPackCpu(unsigned char *cpu, int is_sub) { - unsigned int pc=0; - #if defined(EMU_C68K) struct Cyclone *context = is_sub ? &PicoCpuCS68k : &PicoCpuCM68k; memcpy(cpu,context->d,0x40); - pc=context->pc-context->membase; - *(unsigned int *)(cpu+0x44)=CycloneGetSr(context); - *(unsigned int *)(cpu+0x48)=context->osp; + *(u32 *)(cpu+0x40)=context->pc-context->membase; + *(u32 *)(cpu+0x44)=CycloneGetSr(context); + *(u32 *)(cpu+0x48)=context->osp; cpu[0x4c] = context->irq; cpu[0x4d] = context->state_flags & 1; #elif defined(EMU_M68K) void *oldcontext = m68ki_cpu_p; m68k_set_context(is_sub ? &PicoCpuMS68k : &PicoCpuMM68k); memcpy(cpu,m68ki_cpu_p->dar,0x40); - pc=m68ki_cpu_p->pc; - *(unsigned int *)(cpu+0x44)=m68k_get_reg(NULL, M68K_REG_SR); - *(unsigned int *)(cpu+0x48)=m68ki_cpu_p->sp[m68ki_cpu_p->s_flag^SFLAG_SET]; + *(u32 *)(cpu+0x40)=m68ki_cpu_p->pc; + *(u32 *)(cpu+0x44)=m68k_get_reg(NULL, M68K_REG_SR); + *(u32 *)(cpu+0x48)=m68ki_cpu_p->sp[m68ki_cpu_p->s_flag^SFLAG_SET]; cpu[0x4c] = CPU_INT_LEVEL>>8; cpu[0x4d] = CPU_STOPPED; m68k_set_context(oldcontext); #elif defined(EMU_F68K) M68K_CONTEXT *context = is_sub ? &PicoCpuFS68k : &PicoCpuFM68k; memcpy(cpu,context->dreg,0x40); - pc=context->pc; - *(unsigned int *)(cpu+0x44)=context->sr; - *(unsigned int *)(cpu+0x48)=context->asp; + *(u32 *)(cpu+0x40)=context->pc; + *(u32 *)(cpu+0x44)=context->sr; + *(u32 *)(cpu+0x48)=context->asp; cpu[0x4c] = context->interrupts[0]; cpu[0x4d] = (context->execinfo & FM68K_HALTED) ? 1 : 0; #endif - *(unsigned int *)(cpu+0x40) = pc; - *(unsigned int *)(cpu+0x50) = - is_sub ? SekCycleCntS68k : SekCycleCnt; + if (is_sub) { + *(u32 *)(cpu+0x50) = SekCycleCntS68k; + *(s16 *)(cpu+0x4e) = SekCycleCntS68k - SekCycleAimS68k; + } else { + *(u32 *)(cpu+0x50) = Pico.t.m68c_cnt + Pico.t.z80_buscycles + + ((Pico.t.refresh_delay + (1<<14)/2) >> 14); + *(s16 *)(cpu+0x4e) = Pico.t.m68c_cnt - Pico.t.m68c_aim; + } } PICO_INTERNAL void SekUnpackCpu(const unsigned char *cpu, int is_sub) { #if defined(EMU_C68K) struct Cyclone *context = is_sub ? &PicoCpuCS68k : &PicoCpuCM68k; - CycloneSetSr(context, *(unsigned int *)(cpu+0x44)); - context->osp=*(unsigned int *)(cpu+0x48); + CycloneSetSr(context, *(u32 *)(cpu+0x44)); + context->osp=*(u32 *)(cpu+0x48); memcpy(context->d,cpu,0x40); context->membase = 0; - context->pc = *(unsigned int *)(cpu+0x40); + context->pc = *(u32 *)(cpu+0x40); CycloneUnpack(context, NULL); // rebase PC context->irq = cpu[0x4c]; context->state_flags = 0; @@ -246,33 +244,38 @@ PICO_INTERNAL void SekUnpackCpu(const unsigned char *cpu, int is_sub) #elif defined(EMU_M68K) void *oldcontext = m68ki_cpu_p; m68k_set_context(is_sub ? &PicoCpuMS68k : &PicoCpuMM68k); - m68k_set_reg(M68K_REG_SR, *(unsigned int *)(cpu+0x44)); + m68k_set_reg(M68K_REG_SR, *(u32 *)(cpu+0x44)); memcpy(m68ki_cpu_p->dar,cpu,0x40); - m68ki_cpu_p->pc=*(unsigned int *)(cpu+0x40); - m68ki_cpu_p->sp[m68ki_cpu_p->s_flag^SFLAG_SET]=*(unsigned int *)(cpu+0x48); + m68ki_cpu_p->pc=*(u32 *)(cpu+0x40); + m68ki_cpu_p->sp[m68ki_cpu_p->s_flag^SFLAG_SET]=*(u32 *)(cpu+0x48); CPU_INT_LEVEL = cpu[0x4c] << 8; CPU_STOPPED = cpu[0x4d]; m68k_set_context(oldcontext); #elif defined(EMU_F68K) M68K_CONTEXT *context = is_sub ? &PicoCpuFS68k : &PicoCpuFM68k; memcpy(context->dreg,cpu,0x40); - context->pc =*(unsigned int *)(cpu+0x40); - context->sr =*(unsigned int *)(cpu+0x44); - context->asp=*(unsigned int *)(cpu+0x48); + context->pc =*(u32 *)(cpu+0x40); + context->sr =*(u32 *)(cpu+0x44); + context->asp=*(u32 *)(cpu+0x48); context->interrupts[0] = cpu[0x4c]; context->execinfo &= ~FM68K_HALTED; if (cpu[0x4d]&1) context->execinfo |= FM68K_HALTED; #endif - if (is_sub) - SekCycleCntS68k = *(unsigned int *)(cpu+0x50); - else - SekCycleCnt = *(unsigned int *)(cpu+0x50); + if (is_sub) { + SekCycleCntS68k = *(u32 *)(cpu+0x50); + SekCycleAimS68k = SekCycleCntS68k - *(s16 *)(cpu+0x4e); + } else { + Pico.t.m68c_cnt = *(u32 *)(cpu+0x50); + Pico.t.m68c_aim = Pico.t.m68c_cnt - *(s16 *)(cpu+0x4e); + Pico.t.z80_buscycles = 0; + Pico.t.refresh_delay = 0; + } } /* idle loop detection, not to be used in CD mode */ #ifdef EMU_C68K -#include "cpu/cyclone/tools/idle.h" +#include #endif static unsigned short **idledet_ptrs = NULL; @@ -300,13 +303,6 @@ void SekRegisterIdleHit(unsigned int pc) void SekInitIdleDet(void) { - unsigned short **tmp = realloc(idledet_ptrs, 0x200*4); - if (tmp == NULL) { - free(idledet_ptrs); - idledet_ptrs = NULL; - } - else - idledet_ptrs = tmp; idledet_count = idledet_bads = 0; idledet_start_frame = Pico.m.frame_count + 360; #ifdef IDLE_STATS @@ -317,7 +313,7 @@ void SekInitIdleDet(void) CycloneInitIdle(); #endif #ifdef EMU_F68K - fm68k_emulate(0, 1); + fm68k_idle_install(); #endif } @@ -329,7 +325,7 @@ int SekIsIdleReady(void) int SekIsIdleCode(unsigned short *dst, int bytes) { // printf("SekIsIdleCode %04x %i\n", *dst, bytes); - switch (bytes) + if (idledet_count >= 0) switch (bytes) { case 2: if ((*dst & 0xf000) != 0x6000) // not another branch @@ -340,7 +336,7 @@ int SekIsIdleCode(unsigned short *dst, int bytes) (*dst & 0xc1ff) == 0x0038 || // move.x ($xxxx.w), dX (*dst & 0xf13f) == 0xb038) // cmp.x ($xxxx.w), dX return 1; - if (PicoAHW & (PAHW_MCD|PAHW_32X)) + if (PicoIn.AHW & (PAHW_MCD|PAHW_32X)) break; // with no addons, there should be no need to wait // for byte change anywhere @@ -367,7 +363,7 @@ int SekIsIdleCode(unsigned short *dst, int bytes) return 1; break; case 12: - if (PicoAHW & (PAHW_MCD|PAHW_32X)) + if (PicoIn.AHW & (PAHW_MCD|PAHW_32X)) break; if ( (*dst & 0xf1f8) == 0x3010 && // move.w (aX), dX (dst[1]&0xf100) == 0x0000 && // arithmetic @@ -398,8 +394,11 @@ int SekRegisterIdlePatch(unsigned int pc, int oldop, int newop, void *ctx) (newop&0x200)?'n':'y', is_main68k?'m':'s', idledet_count); // XXX: probably shouldn't patch RAM too - v = m68k_read16_map[pc >> M68K_MEM_SHIFT]; - if (!(v & 0x80000000)) + if (is_main68k) + v = m68k_read16_map[pc >> M68K_MEM_SHIFT]; + else + v = s68k_read16_map[pc >> M68K_MEM_SHIFT]; + if (~v & ~((uptr)-1LL >> 1)) // MSB clear? target = (u16 *)((v << 1) + pc); else { if (++idledet_bads > 128) @@ -407,8 +406,9 @@ int SekRegisterIdlePatch(unsigned int pc, int oldop, int newop, void *ctx) return 1; // don't patch } - if (idledet_count >= 0x200 && (idledet_count & 0x1ff) == 0) { - unsigned short **tmp = realloc(idledet_ptrs, (idledet_count+0x200)*4); + if (!idledet_ptrs || (idledet_count & 0x1ff) == 0) { + unsigned short **tmp; + tmp = realloc(idledet_ptrs, (idledet_count+0x200) * sizeof(tmp[0])); if (tmp == NULL) return 1; idledet_ptrs = tmp; @@ -421,11 +421,13 @@ int SekRegisterIdlePatch(unsigned int pc, int oldop, int newop, void *ctx) void SekFinishIdleDet(void) { + if (idledet_count < 0) + return; #ifdef EMU_C68K CycloneFinishIdle(); #endif #ifdef EMU_F68K - fm68k_emulate(0, 2); + fm68k_idle_remove(); #endif while (idledet_count > 0) { @@ -439,6 +441,11 @@ void SekFinishIdleDet(void) else elprintf(EL_STATUS|EL_IDLE, "idle: don't know how to restore %04x", *op); } + + idledet_count = -1; + if (idledet_ptrs) + free(idledet_ptrs); + idledet_ptrs = NULL; } @@ -460,7 +467,7 @@ void SekTrace(int is_s68k) struct ref_68k *x68k = &ref_68ks[is_s68k]; u32 pc = is_s68k ? SekPcS68k : SekPc; u32 sr = is_s68k ? SekSrS68k : SekSr; - u32 cycles = is_s68k ? SekCycleCntS68k : SekCycleCnt; + u32 cycles = is_s68k ? SekCycleCntS68k : Pico.t.m68c_cnt; u32 r; u8 cmd; #ifdef CPU_CMP_W @@ -549,6 +556,7 @@ breakloop: printf("D%d: %08x A%d: %08x\n", i, x68k->dar[i], i, x68k->dar[i + 8]); printf("PC: %08x, %08x\n", x68k->pc, x68k->pc_prev); + printf("SR: %04x\n", x68k->sr); PDebugDumpMem(); exit(1); diff --git a/pico/sms.c b/pico/sms.c index 8c44d51f..0c2ee634 100644 --- a/pico/sms.c +++ b/pico/sms.c @@ -1,29 +1,34 @@ /* * SMS emulation * (C) notaz, 2009-2010 + * (C) irixxxx, 2021-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ /* * TODO: - * - start in a state as if BIOS ran - * - remaining status flags (OVR/COL) - * - RAM support in mapper - * - region support - * - SN76496 DAC-like usage - * - H counter + * - start in a state as if BIOS ran (partly done for VDP registers, RAM) + * - region support (currently only very limited PAL and Mark-III support) + * - mapper for EEPROM support */ #include "pico_int.h" #include "memory.h" #include "sound/sn76496.h" +#include "sound/emu2413/emu2413.h" + +extern void YM2413_regWrite(unsigned reg); +extern void YM2413_dataWrite(unsigned data); + +extern unsigned sprites_status; // TODO put in some hdr file! static unsigned char vdp_data_read(void) { struct PicoVideo *pv = &Pico.video; unsigned char d; - d = Pico.vramb[pv->addr]; + d = Pico.ms.vdp_buffer; + Pico.ms.vdp_buffer = PicoMem.vramb[MEM_LE2(pv->addr)]; pv->addr = (pv->addr + 1) & 0x3fff; pv->pending = 0; return d; @@ -31,9 +36,16 @@ static unsigned char vdp_data_read(void) static unsigned char vdp_ctl_read(void) { - unsigned char d = Pico.video.pending_ints << 7; - Pico.video.pending = 0; - Pico.video.pending_ints = 0; + struct PicoVideo *pv = &Pico.video; + unsigned char d; + + z80_int_assert(0); + d = pv->status | (pv->pending_ints << 7); + pv->pending = pv->pending_ints = 0; + pv->status = 0; + + if (pv->reg[0] & 0x04) + d |= 0x1f; // unused bits in mode 4 read as 1 elprintf(EL_SR, "VDP sr: %02x", d); return d; @@ -44,28 +56,65 @@ static void vdp_data_write(unsigned char d) struct PicoVideo *pv = &Pico.video; if (pv->type == 3) { - Pico.cram[pv->addr & 0x1f] = d; - Pico.m.dirtyPal = 1; + // cram. 32 on SMS, but 64 on MD. Fill 2nd half of cram for prio bit mirror + if (PicoIn.AHW & PAHW_GG) { // GG, same layout as MD + unsigned a = pv->addr & 0x3f; + if (a & 0x1) { // write complete color on high byte write + u16 c = ((d&0x0f) << 8) | Pico.ms.vdp_buffer; + if (PicoMem.cram[a >> 1] != c) Pico.m.dirtyPal = 1; + PicoMem.cram[a >> 1] = PicoMem.cram[(a >> 1)+0x20] = c; + } + } else { // SMS, convert to MD layout (00BbGgRr to 0000BbBbGgGgRrRr) + unsigned a = pv->addr & 0x1f; + u16 c = ((d&0x30)<<6) + ((d&0x0c)<<4) + ((d&0x03)<<2); + if (PicoMem.cram[a] != (c | (c>>2))) Pico.m.dirtyPal = 1; + PicoMem.cram[a] = PicoMem.cram[a+0x20] = c | (c>>2); + } } else { - Pico.vramb[pv->addr] = d; + PicoMem.vramb[MEM_LE2(pv->addr)] = d; } pv->addr = (pv->addr + 1) & 0x3fff; + Pico.ms.vdp_buffer = d; pv->pending = 0; } -static void vdp_ctl_write(unsigned char d) +static NOINLINE void vdp_reg_write(struct PicoVideo *pv, u8 a, u8 d) +{ + int l; + + pv->reg[a] = d; + switch (a) { + case 0: + l = pv->pending_ints & (d >> 3) & 2; + elprintf(EL_INTS, "hint %d", l); + z80_int_assert(l); + break; + case 1: + l = pv->pending_ints & (d >> 5) & 1; + elprintf(EL_INTS, "vint %d", l); + z80_int_assert(l); + break; + } +} + +static void vdp_ctl_write(u8 d) { struct PicoVideo *pv = &Pico.video; if (pv->pending) { - if ((d >> 6) == 2) { - pv->reg[d & 0x0f] = pv->addr; - elprintf(EL_IO, " VDP r%02x=%02x", d & 0x0f, pv->addr & 0xff); - } pv->type = d >> 6; + if (pv->type == 2) { + elprintf(EL_IO, " VDP r%02x=%02x", d & 0x0f, pv->addr & 0xff); + if (pv->reg[d & 0x0f] != (u8)pv->addr) + vdp_reg_write(pv, d & 0x0f, pv->addr); + } pv->addr &= 0x00ff; pv->addr |= (d & 0x3f) << 8; + if (pv->type == 0) { + Pico.ms.vdp_buffer = PicoMem.vramb[MEM_LE2(pv->addr)]; + pv->addr = (pv->addr + 1) & 0x3fff; + } } else { pv->addr &= 0x3f00; pv->addr |= d; @@ -73,47 +122,94 @@ static void vdp_ctl_write(unsigned char d) pv->pending ^= 1; } +static u8 vdp_hcounter(int cycles) +{ + // 171 slots per scanline of 228 clocks, counted 0xf4-0x93, 0xe9-0xf3 + // this matches h counter tables in SMSVDPTest: + // hc = (cycles+2) * 171 /228 -1 + 0xf4; + int hc = (((cycles+2) * ((171<<8)/228))>>8)-1 + 0xf4; // Q8 to avoid dividing + if (hc > 0x193) hc += 0xe9-0x93-1; + return hc; +} + static unsigned char z80_sms_in(unsigned short a) { - unsigned char d = 0; + unsigned char d = 0xff; + a &= 0xff; elprintf(EL_IO, "z80 port %04x read", a); - a &= 0xc1; - switch (a) - { - case 0x00: - case 0x01: - d = 0xff; - break; - - case 0x40: /* V counter */ - d = Pico.video.v_counter; - elprintf(EL_HVCNT, "V counter read: %02x", d); - break; - - case 0x41: /* H counter */ - d = Pico.m.rotate++; - elprintf(EL_HVCNT, "H counter read: %02x", d); - break; - - case 0x80: - d = vdp_data_read(); - break; - - case 0x81: - d = vdp_ctl_read(); - break; - - case 0xc0: /* I/O port A and B */ - d = ~((PicoPad[0] & 0x3f) | (PicoPad[1] << 6)); - break; - - case 0xc1: /* I/O port B and miscellaneous */ - d = (Pico.ms.io_ctl & 0x80) | ((Pico.ms.io_ctl << 1) & 0x40) | 0x30; - d |= ~(PicoPad[1] >> 2) & 0x0f; - break; + if(a >= 0xf0){ + if (Pico.m.hardware & PMS_HW_FM) { + switch(a) + { + case 0xf0: + // FM reg port + break; + case 0xf1: + // FM data port + break; + case 0xf2: + // bit 0 = 1 active FM Pac + d = 0xf8 | Pico.ms.fm_ctl; + break; + } + } } + else{ + switch (a & 0xc1) + { + case 0x00: + case 0x01: + if ((PicoIn.AHW & PAHW_GG) && a < 0x8) { // GG I/O area + switch (a) { + case 0: d = 0xff & ~(PicoIn.pad[0] & 0x80); break; + case 1: d = Pico.ms.io_gg[1] | (Pico.ms.io_gg[2] & 0x7f); break; + case 5: d = Pico.ms.io_gg[5] & 0xf8; break; + default: d = Pico.ms.io_gg[a]; break; + } + } + break; + case 0x40: /* V counter */ + d = Pico.video.v_counter; + elprintf(EL_HVCNT, "V counter read: %02x", d); + break; + + case 0x41: /* H counter */ + d = Pico.ms.vdp_hlatch; + elprintf(EL_HVCNT, "H counter read: %02x", d); + break; + + case 0x80: + d = vdp_data_read(); + break; + + case 0x81: + d = vdp_ctl_read(); + break; + + case 0xc0: /* I/O port A and B */ + if (! (PicoIn.AHW & PAHW_SC) || (Pico.ms.io_sg & 7) == 7) { + d = ~((PicoIn.pad[0] & 0x3f) | (PicoIn.pad[1] << 6)); + if (!(Pico.ms.io_ctl & 0x01)) // TR as output + d = (d & ~0x20) | ((Pico.ms.io_ctl << 1) & 0x20); + } else + ; // read kbd 8 bits + break; + + case 0xc1: /* I/O port B and miscellaneous */ + if (! (PicoIn.AHW & PAHW_SC) || (Pico.ms.io_sg & 7) == 7) { + d = (Pico.ms.io_ctl & 0x80) | ((Pico.ms.io_ctl << 1) & 0x40) | 0x30; + d |= ~(PicoIn.pad[1] >> 2) & 0x0f; + if (!(Pico.ms.io_ctl & 0x04)) // TR as output + d = (d & ~0x08) | ((Pico.ms.io_ctl >> 3) & 0x08); + if (Pico.ms.io_ctl & 0x08) d |= 0x80; // TH as input is unconnected + if (Pico.ms.io_ctl & 0x02) d |= 0x40; + } else + ; // read kbd 4 bits + break; + } + } elprintf(EL_IO, "ret = %02x", d); return d; } @@ -121,84 +217,518 @@ static unsigned char z80_sms_in(unsigned short a) static void z80_sms_out(unsigned short a, unsigned char d) { elprintf(EL_IO, "z80 port %04x write %02x", a, d); - a &= 0xc1; - switch (a) - { - case 0x01: - Pico.ms.io_ctl = d; - break; - case 0x40: - case 0x41: - if (PicoOpt & POPT_EN_PSG) + a &= 0xff; + if (a >= 0xf0){ + if (Pico.m.hardware & PMS_HW_FM) { + switch(a) + { + case 0xf0: + // FM reg port + Pico.m.hardware |= PMS_HW_FMUSED; + YM2413_regWrite(d); + break; + case 0xf1: + // FM data port + YM2413_dataWrite(d); + break; + case 0xf2: + // bit 0 = 1 active FM Pac + Pico.ms.fm_ctl = d & 0x1; + break; + } + } + } + else { + switch (a & 0xc1) + { + case 0x00: + if ((PicoIn.AHW & PAHW_GG) && a < 0x8) // GG I/O area + Pico.ms.io_gg[a] = d; + if ((PicoIn.AHW & PAHW_GG) && a == 0x6) + SN76496Config(d); + break; + case 0x01: + if ((PicoIn.AHW & PAHW_GG) && a < 0x8) { // GG I/O area + Pico.ms.io_gg[a] = d; + } else { + // pad. latch hcounter if one of the TH lines is switched to 1 + if ((Pico.ms.io_ctl ^ d) & d & 0xa0) + Pico.ms.vdp_hlatch = vdp_hcounter(z80_cyclesDone() - Pico.t.z80c_line_start); + Pico.ms.io_ctl = d; + } + break; + + case 0x40: + case 0x41: + PsndDoPSG(z80_cyclesDone()); SN76496Write(d); - break; + break; - case 0x80: - vdp_data_write(d); - break; + case 0x80: + vdp_data_write(d); + break; - case 0x81: - vdp_ctl_write(d); - break; + case 0x81: + vdp_ctl_write(d); + break; + + case 0xc0: + if ((PicoIn.AHW & PAHW_SC) && (a & 0x2)) + Pico.ms.io_sg = d; // 0xc2 = kbd/pad select + } } } +static void z80_exec(int aim) +{ + Pico.t.z80c_aim = aim; + Pico.t.z80c_cnt += z80_run(Pico.t.z80c_aim - Pico.t.z80c_cnt); +} + + +// ROM/SRAM bank mapping, see https://www.smspower.org/Development/Mappers + static int bank_mask; -static void write_bank(unsigned short a, unsigned char d) +static void xwrite(unsigned int a, unsigned char d); + + +// Sega mapper. Maps 3 banks 16KB each, with SRAM support +static void write_sram_sega(unsigned short a, unsigned char d) { - elprintf(EL_Z80BNK, "bank %04x %02x @ %04x", a, d, z80_pc()); + // SRAM is mapped in 2 16KB banks, selected by bit 2 in control reg + a &= 0x3fff; + a += ((Pico.ms.carthw[0x0c] & 0x04) >> 2) * 0x4000; + + Pico.sv.changed |= (Pico.sv.data[a] != d); + Pico.sv.data[a] = d; +} + +static void write_bank_sega(unsigned short a, unsigned char d) +{ + if (a < 0xfff8) return; + // avoid mapper detection for RAM fill with 0 + if (Pico.ms.mapper != PMS_MAP_SEGA && (Pico.ms.mapper || d == 0)) return; + + elprintf(EL_Z80BNK, "bank sega %04x %02x @ %04x", a, d, z80_pc()); + Pico.ms.mapper = PMS_MAP_SEGA; + if (d == Pico.ms.carthw[a & 0x0f]) return; + Pico.ms.carthw[a & 0x0f] = d; + switch (a & 0x0f) { - case 0x0c: - elprintf(EL_STATUS|EL_ANOMALY, "%02x written to control reg!", d); - break; case 0x0d: - if (d != 0) - elprintf(EL_STATUS|EL_ANOMALY, "bank0 changed to %d!", d); + d &= bank_mask; + z80_map_set(z80_read_map, 0x0400, 0x3fff, Pico.rom+0x400 + (d << 14), 0); break; case 0x0e: d &= bank_mask; z80_map_set(z80_read_map, 0x4000, 0x7fff, Pico.rom + (d << 14), 0); -#ifdef _USE_CZ80 - Cz80_Set_Fetch(&CZ80, 0x4000, 0x7fff, (FPTR)Pico.rom + (d << 14)); -#endif break; + + case 0x0c: + if (d & ~0x8c) + elprintf(EL_STATUS|EL_ANOMALY, "%02x written to control reg!", d); + /*FALLTHROUGH*/ case 0x0f: - d &= bank_mask; - z80_map_set(z80_read_map, 0x8000, 0xbfff, Pico.rom + (d << 14), 0); -#ifdef _USE_CZ80 - Cz80_Set_Fetch(&CZ80, 0x8000, 0xbfff, (FPTR)Pico.rom + (d << 14)); -#endif + if (Pico.ms.carthw[0xc] & 0x08) { + d = (Pico.ms.carthw[0xc] & 0x04) >> 2; + z80_map_set(z80_read_map, 0x8000, 0xbfff, Pico.sv.data + d*0x4000, 0); + z80_map_set(z80_write_map, 0x8000, 0xbfff, write_sram_sega, 1); + } else { + d = Pico.ms.carthw[0xf] & bank_mask; + z80_map_set(z80_read_map, 0x8000, 0xbfff, Pico.rom + (d << 14), 0); + z80_map_set(z80_write_map, 0x8000, 0xbfff, xwrite, 1); + } break; } - Pico.ms.carthw[a & 0x0f] = d; } +// Codemasters mapper. Similar to Sega, but different addresses +static void write_bank_codem(unsigned short a, unsigned char d) +{ + if (a >= 0xc000 || (a & 0x3fff)) return; // address is 0x0000, 0x4000, 0x8000? + // don't detect linear mapping to avoid confusing with MSX + if (Pico.ms.mapper != PMS_MAP_CODEM && (Pico.ms.mapper || (a>>14) == d)) return; + elprintf(EL_Z80BNK, "bank codem %04x %02x @ %04x", a, d, z80_pc()); + Pico.ms.mapper = PMS_MAP_CODEM; + if (Pico.ms.carthw[a>>14] == d) return; + Pico.ms.carthw[a>>14] = d; + + d &= bank_mask; + z80_map_set(z80_read_map, a, a+0x3fff, Pico.rom + (d << 14), 0); + if (Pico.ms.carthw[1] & 0x80) { + z80_map_set(z80_read_map, 0xa000, 0xbfff, PicoMem.vram+0x4000, 0); + z80_map_set(z80_write_map, 0xa000, 0xbfff, PicoMem.vram+0x4000, 0); + } else { + d = Pico.ms.carthw[2] & bank_mask; + z80_map_set(z80_read_map, 0xa000, 0xbfff, Pico.rom + (d << 14)+0x2000, 0); + z80_map_set(z80_write_map, 0xa000, 0xbfff, xwrite, 1); + } +} + +// MSX mapper. 4 selectable 8KB banks at the top +static void write_bank_msx(unsigned short a, unsigned char d) +{ + if (a > 0x0003) return; + // don't detect linear mapping to avoid confusing with Codemasters + if (Pico.ms.mapper != PMS_MAP_MSX && (Pico.ms.mapper || (a|d) == 0 || d >= 0x80)) return; + elprintf(EL_Z80BNK, "bank msx %04x %02x @ %04x", a, d, z80_pc()); + Pico.ms.mapper = PMS_MAP_MSX; + Pico.ms.carthw[a] = d; + + a = (a^2)*0x2000 + 0x4000; + d &= 2*bank_mask + 1; + z80_map_set(z80_read_map, a, a+0x1fff, Pico.rom + (d << 13), 0); +} + +// Korea mapping, 1 selectable 16KB bank at the top +static void write_bank_korea(unsigned short a, unsigned char d) +{ + if (a != 0xa000) return; + if (Pico.ms.mapper != PMS_MAP_KOREA && (Pico.ms.mapper)) return; + elprintf(EL_Z80BNK, "bank korea %04x %02x @ %04x", a, d, z80_pc()); + Pico.ms.mapper = PMS_MAP_KOREA; + Pico.ms.carthw[0xf] = d; + + d &= bank_mask; + z80_map_set(z80_read_map, 0x8000, 0xbfff, Pico.rom + (d << 14), 0); +} + +// Korean n-in-1 mapping. 1 selectable 32KB bank at the bottom +static void write_bank_n32k(unsigned short a, unsigned char d) +{ + if (a != 0xffff) return; + // code must be in RAM since all visible ROM space is swapped + if (Pico.ms.mapper != PMS_MAP_N32K && (Pico.ms.mapper || z80_pc() < 0xc000)) return; + elprintf(EL_Z80BNK, "bank 32k %04x %02x @ %04x", a, d, z80_pc()); + Pico.ms.mapper = PMS_MAP_N32K; + Pico.ms.carthw[0xf] = d; + + d &= bank_mask >> 1; + z80_map_set(z80_read_map, 0, 0x7fff, Pico.rom + (d << 15), 0); +} + +// Korean 4-in-1. 2 selectable 16KB banks, top bank is shifted by bottom one +static void write_bank_n16k(unsigned short a, unsigned char d) +{ + if (a != 0x3ffe && a != 0x7fff && a != 0xbfff) return; + // code must be in RAM since all visible ROM space is swapped + if (Pico.ms.mapper != PMS_MAP_N16K && (Pico.ms.mapper || z80_pc() < 0xc000)) return; + elprintf(EL_Z80BNK, "bank 16k %04x %02x @ %04x", a, d, z80_pc()); + Pico.ms.mapper = PMS_MAP_N16K; + Pico.ms.carthw[a>>14] = d; + + d &= bank_mask; + a = a & 0xc000; + // the top bank shifts with the bottom bank. + if (a == 0x8000) d += Pico.ms.carthw[0] & 0x30; + z80_map_set(z80_read_map, a, a+0x3fff, Pico.rom + (d << 14), 0); +} + +// MSX-Nemesis mapper. 4 selectable 8KB banks at the top +static void write_bank_msxn(unsigned short a, unsigned char d) +{ + if (a > 0x0003) return; + // never autodetected, selectable only via config + if (Pico.ms.mapper != PMS_MAP_NEMESIS) return; + elprintf(EL_Z80BNK, "bank nems %04x %02x @ %04x", a, d, z80_pc()); + Pico.ms.carthw[a] = d; + + a = (a^2)*0x2000 + 0x4000; + d &= 2*bank_mask + 1; + z80_map_set(z80_read_map, a, a+0x1fff, Pico.rom + (d << 13), 0); +} + +// Korean Janggun mapper. 4 selectable 8KB banks at the top, hardware byte flip +static unsigned char read_flipped_jang(unsigned a) +{ + static unsigned char flipper[16] = // reversed nibble bit order + { 0x0,0x8,0x4,0xc,0x2,0xa,0x6,0xe,0x1,0x9,0x5,0xd,0x3,0xb,0x7,0xf }; + unsigned char c; + + // return value at address a in reversed bit order + c = Pico.rom[(Pico.ms.carthw[a>>13] << 13) + (a & 0x1fff)]; + return (flipper[c&0xf]<<4) | flipper[c>>4]; +} + +static void write_bank_jang(unsigned short a, unsigned char d) +{ + // address is 0xfffe, 0xffff, 0x4000, 0x6000, 0x8000, 0xa000 + if ((a|1) != 0xffff && (a < 0x4000 || a > 0xa000 || (a & 0x1fff))) return; + // never autodetected, selectable only via config + if (Pico.ms.mapper != PMS_MAP_JANGGUN) return; + elprintf(EL_Z80BNK, "bank jang %04x %02x @ %04x", a, d, z80_pc()); + + if ((a|1) == 0xffff) { + int x = a & 1, f = d & 0x40; + Pico.ms.carthw[x] = d; + d &= bank_mask; + Pico.ms.carthw[2*x + 2] = 2*d, Pico.ms.carthw[2*x + 3] = 2*d+1; + a = (x+1) * 0x4000; + if (!f) + z80_map_set(z80_read_map, a, a+0x3fff, Pico.rom + (d << 14), 0); + else + z80_map_set(z80_read_map, a, a+0x3fff, read_flipped_jang, 1); + } else { + d &= 2*bank_mask + 1; + Pico.ms.carthw[a>>13] = d; + if (!(Pico.ms.carthw[(a>>15)&1] & 0x40)) + z80_map_set(z80_read_map, a, a+0x1fff, Pico.rom + (d << 13), 0); + else + z80_map_set(z80_read_map, a, a+0x1fff, read_flipped_jang, 1); + } +} + +// Korean 188-in-1. 4 8KB banks from 0x4000, selected by xor'd bank index +static void write_bank_xor(unsigned short a, unsigned char d) +{ + // 4x8KB bank select @0x2000 + if ((a&0xff00) != 0x2000) return; + if (Pico.ms.mapper != PMS_MAP_XOR && Pico.ms.mapper) return; + + elprintf(EL_Z80BNK, "bank xor %04x %02x @ %04x", a, d, z80_pc()); + Pico.ms.mapper = PMS_MAP_XOR; + + Pico.ms.carthw[0] = d; + z80_map_set(z80_read_map, 0x4000, 0x5fff, Pico.rom + ((d^0x1f) << 13), 0); + z80_map_set(z80_read_map, 0x6000, 0x7fff, Pico.rom + ((d^0x1e) << 13), 0); + z80_map_set(z80_read_map, 0x8000, 0x9fff, Pico.rom + ((d^0x1d) << 13), 0); + z80_map_set(z80_read_map, 0xa000, 0xbfff, Pico.rom + ((d^0x1c) << 13), 0); +} + +// SG-1000 8KB RAM Adaptor mapper. 8KB RAM at address 0x2000 +static void write_bank_x8k(unsigned short a, unsigned char d) +{ + // 8KB address range @ 0x2000 (adaptor) or @ 0x8000 (cartridge) + if (((a&0xe000) != 0x2000 && (a&0xe000) != 0x8000) || (a & 0x0f) == 5) return; + if (Pico.ms.mapper != PMS_MAP_8KBRAM && Pico.ms.mapper) return; + + elprintf(EL_Z80BNK, "bank x8k %04x %02x @ %04x", a, d, z80_pc()); + ((unsigned char *)(PicoMem.vram+0x4000))[a&0x1fff] = d; + Pico.ms.mapper = PMS_MAP_8KBRAM; + + a &= 0xe000; + Pico.ms.carthw[0] = a >> 12; + z80_map_set(z80_read_map, a, a+0x1fff, PicoMem.vram+0x4000, 0); + z80_map_set(z80_write_map, a, a+0x1fff, PicoMem.vram+0x4000, 0); +} + +// SC-3000 32KB RAM mapper for BASIC level IIIB. 32KB RAM at address 0x8000 +static void write_bank_x32k(unsigned short a, unsigned char d) +{ + // 32KB address range @ 0x8000 + if ((a&0xc000) != 0x8000) return; + if (Pico.ms.mapper != PMS_MAP_32KBRAM && + (Pico.ms.mapper || Pico.romsize > 0x8000)) return; + + elprintf(EL_Z80BNK, "bank x32k %04x %02x @ %04x", a, d, z80_pc()); + ((unsigned char *)(PicoMem.vram+0x4000))[a&0x7fff] = d; + Pico.ms.mapper = PMS_MAP_32KBRAM; + + a &= 0xc000; + Pico.ms.carthw[0] = a >> 12; + // NB this deactivates internal RAM and all mapper detection + z80_map_set(z80_read_map, a, a+0x7fff, PicoMem.vram+0x4000, 0); + z80_map_set(z80_write_map, a, a+0x7fff, PicoMem.vram+0x4000, 0); +} + +char *mappers[] = { + [PMS_MAP_SEGA] = "Sega", + [PMS_MAP_CODEM] = "Codemasters", + [PMS_MAP_KOREA] = "Korea", + [PMS_MAP_MSX] = "Korea MSX", + [PMS_MAP_N32K] = "Korea X-in-1", + [PMS_MAP_N16K] = "Korea 4-Pak", + [PMS_MAP_JANGGUN] = "Korea Janggun", + [PMS_MAP_NEMESIS] = "Korea Nemesis", + [PMS_MAP_8KBRAM] = "Taiwan 8K RAM", + [PMS_MAP_XOR] = "Korea XOR", + [PMS_MAP_32KBRAM] = "Sega 32K RAM", +}; + +// TODO auto-selecting is not really reliable. +// Before adding more mappers this should be revised. static void xwrite(unsigned int a, unsigned char d) { + int sz = (/*PicoIn.AHW & (PAHW_SG|PAHW_SC) ? 2 :*/ 8) * 1024; + elprintf(EL_IO, "z80 write [%04x] %02x", a, d); if (a >= 0xc000) - Pico.zram[a & 0x1fff] = d; - if (a >= 0xfff8) - write_bank(a, d); + PicoMem.zram[a & (sz-1)] = d; + + switch (Pico.ms.mapper) { // via config, or auto detected + case PMS_MAP_SEGA: write_bank_sega(a, d); break; + case PMS_MAP_CODEM: write_bank_codem(a, d); break; + case PMS_MAP_MSX: write_bank_msx(a, d); break; + case PMS_MAP_KOREA: write_bank_korea(a, d); break; + case PMS_MAP_N32K: write_bank_n32k(a, d); break; + case PMS_MAP_N16K: write_bank_n16k(a, d); break; + case PMS_MAP_JANGGUN: write_bank_jang(a, d); break; + case PMS_MAP_NEMESIS: write_bank_msxn(a, d); break; + case PMS_MAP_8KBRAM: write_bank_x8k(a, d); break; + case PMS_MAP_32KBRAM: write_bank_x32k(a, d); break; + case PMS_MAP_XOR: write_bank_xor(a, d); break; + + case PMS_MAP_AUTO: + // disable autodetection after some time + if ((a >= 0xc000 && a < 0xfff8) || Pico.ms.mapcnt > 50) break; + // NB the sequence of mappers is crucial for the auto detection + if (PicoIn.AHW & PAHW_SC) { + write_bank_x32k(a,d); + } else if (PicoIn.AHW & PAHW_SG) { + write_bank_x8k(a, d); + } else { + write_bank_n32k(a, d); + write_bank_sega(a, d); + write_bank_msx(a, d); + write_bank_codem(a, d); + write_bank_korea(a, d); + write_bank_n16k(a, d); + write_bank_xor(a, d); + } + + Pico.ms.mapcnt ++; + if (Pico.ms.mapper) + elprintf(EL_STATUS, "autodetected %s mapper",mappers[Pico.ms.mapper]); + break; + } } +// Try to detect some tricky cases by their TMR header +// NB Codemasters, some Betas, most unlicensed games have no or invalid TMRs. +// if the cksum header is valid mark this by 0x.fff.... and use that instead + +// TMR product codes and hardware type for known 50Hz-only games +static u32 region_pal[] = { // cf Meka, meka/meka.nam + 0x40207067 /* Addams Family */, 0x40207020 /* Back.Future 3 */, + 0x40207058 /* Battlemaniacs */, 0x40007105 /* Cal.Games 2 */, + 0x402f7065 /* Dracula */ , 0x40007109 /* Home Alone */, + 0x40009024 /* Pwr.Strike 2 */ , 0x40207047 /* Predator 2 EU */, + 0x40002519 /* Quest.Yak */ , 0x40207064 /* Robocop 3 */, + 0x4f205014 /* Sens.Soccer */ , 0x40002573 /* Sonic Blast */, + 0x40007080 /* S.Harrier EU */ , 0x40007038 /* Taito Chase */, + 0x40009015 /* Sonic 2 EU */ , /* NBA Jam: no valid id/cksum */ + 0x4fff8872 /* Excell.Dizzy */ , 0x4ffffac4 /* Fantast.Dizzy */, + 0x4fff4a89 /* Csm.Spacehead */, 0x4fffe352 /* Micr.Machines */, + 0x4fffa203 /* Bad Apple */ +}; + +// TMR product codes and hardware type for known non-FM games +static u32 no_fmsound[] = { // cf Meka, meka/meka.pat + 0x40002070 /* Walter Payton */, 0x40017020 /* American Pro */, + 0x4fffe890 /* Wanted */ +}; + +// TMR product codes and hardware type for known GG carts running in SMS mode +// NB GG carts having the system type set to 4 (eg. HTH games) run as SMS anyway +static u32 gg_smsmode[] = { // cf https://www.smspower.org/Tags/SMS-GG + 0x60002401 /* Castl.Ilusion */, 0x6f101018 /* Taito Chase */, + 0x70709018 /* Olympic Gold */ , 0x70709038 /* Outrun EU */, + 0x60801068 /* Predator 2 */ , 0x70408098 /* Prince.Persia */, + 0x50101037 /* Rastan Saga */ , 0x7f086018 /* RC Grandprix */, + 0x60002415 /* Super Kickoff */, 0x60801108 /* WWF.Steelcage */, + /* Excell.Dizzy, Fantast.Dizzy, Super Tetris: no valid id/cksum in TMR */ + 0x4f813028 /* Tesserae */ +}; + void PicoResetMS(void) { + unsigned tmr; + u32 id, hw, ck, i; + + // set preselected hw/mapper from config + if (PicoIn.hwSelect) { + PicoIn.AHW &= ~(PAHW_GG|PAHW_SG|PAHW_SC); + switch (PicoIn.hwSelect) { + case PHWS_GG: PicoIn.AHW |= PAHW_GG; break; + case PHWS_SG: PicoIn.AHW |= PAHW_SG; break; + case PHWS_SC: PicoIn.AHW |= PAHW_SC; break; + } + } + Pico.ms.mapcnt = Pico.ms.mapper = 0; + if (PicoIn.mapper) + Pico.ms.mapper = PicoIn.mapper; + Pico.m.hardware |= PMS_HW_JAP; // default region Japan if no TMR header + if (PicoIn.regionOverride > 2) + Pico.m.hardware &= ~PMS_HW_JAP; + Pico.m.hardware |= PMS_HW_FM; + if (!(PicoIn.opt & POPT_EN_YM2413)) + Pico.m.hardware &= ~PMS_HW_FM; + + // check if the ROM header contains more system information + for (tmr = 0x2000; tmr < 0xbfff && tmr <= Pico.romsize; tmr *= 2) { + if (!memcmp(Pico.rom + tmr-16, "TMR SEGA", 8)) { + hw = Pico.rom[tmr-1] >> 4; + id = CPU_LE4(*(u32 *)&Pico.rom[tmr-4]); + ck = *(u16 *)&Pico.rom[tmr-6] | (id&0xf0000000) | 0xfff0000; + + if (!PicoIn.hwSelect && !PicoIn.AHW && hw && ((id+1)&0xfffe) != 0) { + if (hw >= 0x5 && hw < 0x8) + PicoIn.AHW |= PAHW_GG; // GG cartridge detected + } + if (!PicoIn.regionOverride) { + Pico.m.hardware &= ~PMS_HW_JAP; + if (hw == 0x5 || hw == 0x3) + Pico.m.hardware |= PMS_HW_JAP; // region Japan + } + for (i = 0; i < sizeof(region_pal)/sizeof(*region_pal); i++) + if ((id == region_pal[i] || ck == region_pal[i]) && !PicoIn.regionOverride) + { + Pico.m.pal = 1; // requires 50Hz timing + break; + } + for (i = 0; i < sizeof(gg_smsmode)/sizeof(*gg_smsmode); i++) + if ((id == gg_smsmode[i] || ck == gg_smsmode[i]) && !PicoIn.hwSelect) { + PicoIn.AHW &= ~PAHW_GG; // requires SMS mode + if (hw < 0x5) PicoIn.AHW |= PAHW_GG; + break; + } + for (i = 0; i < sizeof(no_fmsound)/sizeof(*no_fmsound); i++) + if ((id == no_fmsound[i] || ck == no_fmsound[i])) { + Pico.m.hardware &= ~PMS_HW_FM; // incompatible with FM + break; + } + break; + } + } + z80_reset(); PsndReset(); // pal must be known here + + Pico.ms.io_ctl = (PicoIn.AHW & (PAHW_SG|PAHW_SC)) ? 0xf5 : 0xff; + Pico.ms.fm_ctl = 0xff; + + // reset memory mapping + PicoMemSetupMS(); + + // BIOS, VDP intialisation + Pico.video.reg[0] = 0x36; + Pico.video.reg[1] = 0xa0; + Pico.video.reg[2] = 0xff; + Pico.video.reg[3] = 0xff; + Pico.video.reg[4] = 0xff; + Pico.video.reg[5] = 0xff; + Pico.video.reg[6] = 0xfb; + Pico.video.reg[7] = 0x00; + Pico.video.reg[8] = 0x00; + Pico.video.reg[9] = 0x00; + Pico.video.reg[10] = 0xff; + Pico.m.dirtyPal = 1; + + // BIOS, clear zram (unitialized on Mark-III, cf src/mame/drivers/sms.cpp) + i = !(PicoIn.AHW & PAHW_GG) && (Pico.m.hardware & PMS_HW_JAP) ? 0xf0 : 0x00; + memset(PicoMem.zram, i, sizeof(PicoMem.zram)); } void PicoPowerMS(void) { int s, tmp; - memset(&Pico.ram,0,(unsigned char *)&Pico.rom - Pico.ram); + memset(&PicoMem,0,sizeof(PicoMem)); memset(&Pico.video,0,sizeof(Pico.video)); memset(&Pico.m,0,sizeof(Pico.m)); - Pico.m.pal = 0; // calculate a mask for bank writes. // ROM loader has aligned the size for us, so this is safe. @@ -210,39 +740,117 @@ void PicoPowerMS(void) tmp = 1 << s; bank_mask = (tmp - 1) >> 14; - Pico.ms.carthw[0x0e] = 1; - Pico.ms.carthw[0x0f] = 2; - + PicoMem.ioports[0] = 0xc3; // hack to jump @0 at end of RAM to wrap around + Pico.ms.mapper = PicoIn.mapper; PicoReset(); } void PicoMemSetupMS(void) { - z80_map_set(z80_read_map, 0x0000, 0xbfff, Pico.rom, 0); - z80_map_set(z80_read_map, 0xc000, 0xdfff, Pico.zram, 0); - z80_map_set(z80_read_map, 0xe000, 0xffff, Pico.zram, 0); + u8 mapper = Pico.ms.mapper; + int sz = (/*PicoIn.AHW & (PAHW_SG|PAHW_SC) ? 2 :*/ 8) * 1024; + u32 a; - z80_map_set(z80_write_map, 0x0000, 0xbfff, xwrite, 1); - z80_map_set(z80_write_map, 0xc000, 0xdfff, Pico.zram, 0); - z80_map_set(z80_write_map, 0xe000, 0xffff, xwrite, 1); - + // RAM and its mirrors + for (a = 0xc000; a < 0x10000; a += sz) { + z80_map_set(z80_read_map, a, a + sz-1, PicoMem.zram, 0); + z80_map_set(z80_write_map, a, a + sz-1, PicoMem.zram, 0); + } + a = 0xffff - (1< 0x1e000) + z80_map_set(z80_read_map, 0x0000, 0x1fff, Pico.rom + 0x1e000, 0); #ifdef _USE_DRZ80 drZ80.z80_in = z80_sms_in; drZ80.z80_out = z80_sms_out; #endif #ifdef _USE_CZ80 - Cz80_Set_Fetch(&CZ80, 0x0000, 0xbfff, (FPTR)Pico.rom); - Cz80_Set_Fetch(&CZ80, 0xc000, 0xdfff, (FPTR)Pico.zram); - Cz80_Set_Fetch(&CZ80, 0xe000, 0xffff, (FPTR)Pico.zram); Cz80_Set_INPort(&CZ80, z80_sms_in); Cz80_Set_OUTPort(&CZ80, z80_sms_out); #endif + + // memory mapper setup, linear mapping of 1st 48KB + memset(Pico.ms.carthw, 0, sizeof(Pico.ms.carthw)); + if (mapper == PMS_MAP_MSX || mapper == PMS_MAP_NEMESIS) { + xwrite(0x0000, 4); + xwrite(0x0001, 5); + xwrite(0x0002, 2); + xwrite(0x0003, 3); + } else if (mapper == PMS_MAP_KOREA) { + xwrite(0xa000, 2); + } else if (mapper == PMS_MAP_N32K) { + xwrite(0xffff, 0); + } else if (mapper == PMS_MAP_N16K) { + xwrite(0x3ffe, 0); + xwrite(0x7fff, 1); + xwrite(0xbfff, 2); + } else if (mapper == PMS_MAP_JANGGUN) { + xwrite(0xfffe, 1); + xwrite(0xffff, 2); + } else if (mapper == PMS_MAP_XOR) { + xwrite(0x2000, 0); + } else if (mapper == PMS_MAP_CODEM) { + xwrite(0x0000, 0); + xwrite(0x4000, 1); + xwrite(0x8000, 2); + } else if (mapper == PMS_MAP_SEGA) { + xwrite(0xfffc, 0); + xwrite(0xfffd, 0); + xwrite(0xfffe, 1); + xwrite(0xffff, 2); + } else if (mapper == PMS_MAP_AUTO) { + // pre-initialize Sega mapper to linear mapping (else state load may fail) + Pico.ms.carthw[0xe] = 0x1; + Pico.ms.carthw[0xf] = 0x2; + } } void PicoStateLoadedMS(void) { - write_bank(0xfffe, Pico.ms.carthw[0x0e]); - write_bank(0xffff, Pico.ms.carthw[0x0f]); + u8 mapper = Pico.ms.mapper; + u8 zram_dff0[16]; // TODO xwrite also writes to zram :-/ + + memcpy(zram_dff0, PicoMem.zram+0x1ff0, 16); + if (mapper == PMS_MAP_8KBRAM || mapper == PMS_MAP_32KBRAM) { + u16 a = Pico.ms.carthw[0] << 12; + xwrite(a, *(unsigned char *)(PicoMem.vram+0x4000)); + } else if (mapper == PMS_MAP_MSX || mapper == PMS_MAP_NEMESIS) { + xwrite(0x0000, Pico.ms.carthw[0]); + xwrite(0x0001, Pico.ms.carthw[1]); + xwrite(0x0002, Pico.ms.carthw[2]); + xwrite(0x0003, Pico.ms.carthw[3]); + } else if (mapper == PMS_MAP_KOREA) { + xwrite(0xa000, Pico.ms.carthw[0x0f]); + } else if (mapper == PMS_MAP_N32K) { + xwrite(0xffff, Pico.ms.carthw[0x0f]); + } else if (mapper == PMS_MAP_N16K) { + xwrite(0x3ffe, Pico.ms.carthw[0]); + xwrite(0x7fff, Pico.ms.carthw[1]); + xwrite(0xbfff, Pico.ms.carthw[2]); + } else if (mapper == PMS_MAP_JANGGUN) { + xwrite(0x4000, Pico.ms.carthw[2]); + xwrite(0x6000, Pico.ms.carthw[3]); + xwrite(0x8000, Pico.ms.carthw[4]); + xwrite(0xa000, Pico.ms.carthw[5]); + } else if (mapper == PMS_MAP_XOR) { + xwrite(0x2000, Pico.ms.carthw[0]); + } else if (mapper == PMS_MAP_CODEM) { + xwrite(0x0000, Pico.ms.carthw[0]); + xwrite(0x4000, Pico.ms.carthw[1]); + xwrite(0x8000, Pico.ms.carthw[2]); + } else if (mapper == PMS_MAP_SEGA) { + xwrite(0xfffc, Pico.ms.carthw[0x0c]); + xwrite(0xfffd, Pico.ms.carthw[0x0d]); + xwrite(0xfffe, Pico.ms.carthw[0x0e]); + xwrite(0xffff, Pico.ms.carthw[0x0f]); + } + memcpy(PicoMem.zram+0x1ff0, zram_dff0, 16); } void PicoFrameMS(void) @@ -250,67 +858,107 @@ void PicoFrameMS(void) struct PicoVideo *pv = &Pico.video; int is_pal = Pico.m.pal; int lines = is_pal ? 313 : 262; - int cycles_line = is_pal ? 58020 : 58293; /* (226.6 : 227.7) * 256 */ - int cycles_done = 0, cycles_aim = 0; - int skip = PicoSkipFrame; + int cycles_line = 228; + int skip = PicoIn.skipFrame; int lines_vis = 192; int hint; // Hint counter int nmi; int y; - nmi = (PicoPad[0] >> 7) & 1; - if (!Pico.ms.nmi_state && nmi) + PsndStartFrame(); + + // for SMS the pause button generates an NMI, for GG ths is not the case + nmi = (PicoIn.pad[0] >> 7) & 1; + if (!(PicoIn.AHW & PAHW_GG) && !Pico.ms.nmi_state && nmi) z80_nmi(); Pico.ms.nmi_state = nmi; - PicoFrameStartMode4(); + if ((pv->reg[0] & 6) == 6 && (pv->reg[1] & 0x18)) + lines_vis = (pv->reg[1] & 0x08) ? 240 : 224; + PicoFrameStartSMS(); hint = pv->reg[0x0a]; + // SMS: xscroll:f3 sprovr,vint, vcount:fc, hint:fd + // GG: xscroll:f5 sprovr,vint:fd vcount:fe, hint:ff for (y = 0; y < lines; y++) { - pv->v_counter = Pico.m.scanline = y; - if (y > 218) - pv->v_counter = y - 6; + Pico.t.z80c_line_start = Pico.t.z80c_aim; + // advance the line counter. It is set back at some point in the VBLANK so + // that the line count in the active area (-32..lines+1) is contiguous. + pv->v_counter = Pico.m.scanline = (u8)y; + switch (is_pal ? -lines_vis : lines_vis) { + case 192: if (y > 218) pv->v_counter = y - (lines-256); break; + case 224: if (y > 234) pv->v_counter = y - (lines-256); break; +/* case 240: if (y > 242) pv->v_counter = y - (lines-256); break; ? */ + case -192: if (y > 242) pv->v_counter = y - (lines-256); break; + case -224: if (y > 258) pv->v_counter = y - (lines-256); break; + case -240: if (y > 266) pv->v_counter = y - (lines-256); break; + } + + // Parse sprites for the next line + if (y < lines_vis) + PicoParseSATSMS(y-1); + else if (y > lines-32) + PicoParseSATSMS(y-1-lines); + + // render next line if (y < lines_vis && !skip) - PicoLineMode4(y); + PicoLineSMS(y); + // take over status bits from previously rendered line TODO: cycle exact? + pv->status |= sprites_status; + sprites_status = 0; + + // Interrupt handling. Simulate interrupt flagged and immediately reset in + // same insn by flagging the irq, execute for 1 insn, then checking if the + // irq is still pending. (GG Chicago, SMS Back to the Future III) + pv->pending_ints &= ~2; // lost if not caught in the same line if (y <= lines_vis) { if (--hint < 0) { hint = pv->reg[0x0a]; pv->pending_ints |= 2; - if (pv->reg[0] & 0x10) { + z80_exec(Pico.t.z80c_cnt + 1); + + if ((pv->reg[0] & 0x10) && (pv->pending_ints & 2)) { elprintf(EL_INTS, "hint"); - z80_int(); + z80_int_assert(1); } } } else if (y == lines_vis + 1) { pv->pending_ints |= 1; - if (pv->reg[1] & 0x20) { + z80_exec(Pico.t.z80c_cnt + 1); + + if ((pv->reg[1] & 0x20) && (pv->pending_ints & 1)) { elprintf(EL_INTS, "vint"); - z80_int(); + z80_int_assert(1); } } - cycles_aim += cycles_line; - cycles_done += z80_run((cycles_aim - cycles_done) >> 8) << 8; + z80_exec(Pico.t.z80c_line_start + cycles_line); } - if (PsndOut) - PsndGetSamplesMS(); + z80_resetCycles(); + PsndGetSamplesMS(lines); } void PicoFrameDrawOnlyMS(void) { + struct PicoVideo *pv = &Pico.video; int lines_vis = 192; int y; - PicoFrameStartMode4(); + if ((pv->reg[0] & 6) == 6 && (pv->reg[1] & 0x18)) + lines_vis = (pv->reg[1] & 0x08) ? 240 : 224; + PicoFrameStartSMS(); - for (y = 0; y < lines_vis; y++) - PicoLineMode4(y); + for (y = 0; y < lines_vis; y++) { + PicoParseSATSMS(y-1); + PicoLineSMS(y); + } } +// vim:ts=2:sw=2:expandtab diff --git a/pico/sound/emu2413 b/pico/sound/emu2413 new file mode 160000 index 00000000..a2dfc20f --- /dev/null +++ b/pico/sound/emu2413 @@ -0,0 +1 @@ +Subproject commit a2dfc20ff507e4fd075cd325620bcea655e2c1f7 diff --git a/pico/sound/mix.c b/pico/sound/mix.c index 636edb55..1a4785d1 100644 --- a/pico/sound/mix.c +++ b/pico/sound/mix.c @@ -1,79 +1,172 @@ /* * some code for sample mixing * (C) notaz, 2006,2007 + * (C) irixxxx, 2019,2020 added filtering * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ +#include +#include "../pico_int.h" + #define MAXOUT (+32767) #define MINOUT (-32768) /* limitter */ -#define Limit(val, max,min) { \ - if ( val > max ) val = max; \ - else if ( val < min ) val = min; \ -} +#define Limit16(val) \ + val -= val >> 3; /* reduce level to avoid clipping */ \ + if ((s16)val != val) val = (val < 0 ? MINOUT : MAXOUT) + +int mix_32_to_16_level; + +static struct iir { + int alpha; // alpha for EMA low pass + int y[2]; // filter intermediates +} lfi2, rfi2; + +// NB ">>" rounds to -infinity, "/" to 0. To compensate the effect possibly use +// "-(-y>>n)" (round to +infinity) instead of "y>>n" in places. + +// NB uses fixpoint; samples mustn't have more than (32-QB) bits. Adding the +// outputs of the sound sources together yields a max. of 18 bits, restricting +// QB to a maximum of 14. +#define QB 12 +// NB alpha for DC filtering shouldn't be smaller than 1/(1< 0; count--) - { - l = r = *dest; - l += *src++; - r += *src++; - Limit( l, MAXOUT, MINOUT ); - Limit( r, MAXOUT, MINOUT ); - *dest++ = l; - *dest++ = r; - } + // low pass. alpha is Q8 to avoid loss by 32 bit overflow. +// fi2->y[0] += ((x<<(QB-8)) - (fi2->y[0]>>8)) * fi2->alpha; + fi2->y[0] += (x - (fi2->y[0]>>QB)) * fi2->alpha; + // DC filter. for alpha=1-1/8192 cutoff ~1HZ, for 1-1/1024 ~7Hz + fi2->y[1] += (fi2->y[0] - fi2->y[1]) >> QB; + return (fi2->y[0] - fi2->y[1]) >> QB; } +// exponential moving average filter for DC filtering +// y[n] = (x[n]-y[n-1])*(1-1/8192) (corner approx. 1Hz, gain 1) +static inline int filter_exp(struct iir *fi2, int x) +{ + fi2->y[1] += ((x << QB) - fi2->y[1]) >> QB; + return x - (fi2->y[1] >> QB); +} -void mix_32_to_16_mono(short *dest, int *src, int count) +// unfiltered (for testing) +static inline int filter_null(struct iir *fi2, int x) +{ + return x; +} + +#define filter filter_band + +#define mix_32_to_16_stereo_core(dest, src, count, lv, fl) { \ + int l, r; \ + struct iir lf = lfi2, rf = rfi2; \ + \ + for (; count > 0; count--) \ + { \ + l = *dest; \ + l += *src++ >> lv; \ + l = fl(&lf, l); \ + Limit16(l); \ + *dest++ = l; \ + r = *dest; \ + r += *src++ >> lv; \ + r = fl(&rf, r); \ + Limit16(r); \ + *dest++ = r; \ + } \ + lfi2 = lf, rfi2 = rf; \ +} + +void mix_32_to_16_stereo_lvl(s16 *dest, s32 *src, int count) +{ + mix_32_to_16_stereo_core(dest, src, count, mix_32_to_16_level, filter); +} + +void mix_32_to_16_stereo(s16 *dest, s32 *src, int count) +{ + mix_32_to_16_stereo_core(dest, src, count, 0, filter); +} + +void mix_32_to_16_mono(s16 *dest, s32 *src, int count) { int l; + struct iir lf = lfi2; for (; count > 0; count--) { l = *dest; l += *src++; - Limit( l, MAXOUT, MINOUT ); + l = filter(&lf, l); + Limit16(l); *dest++ = l; } + lfi2 = lf; } -void mix_16h_to_32(int *dest_buf, short *mp3_buf, int count) +void mix_16h_to_32(s32 *dest_buf, s16 *mp3_buf, int count) { while (count--) { - *dest_buf++ += *mp3_buf++ >> 1; + *dest_buf++ += (*mp3_buf++ * 5) >> 3; } } -void mix_16h_to_32_s1(int *dest_buf, short *mp3_buf, int count) +void mix_16h_to_32_s1(s32 *dest_buf, s16 *mp3_buf, int count) { count >>= 1; while (count--) { - *dest_buf++ += *mp3_buf++ >> 1; - *dest_buf++ += *mp3_buf++ >> 1; + *dest_buf++ += (*mp3_buf++ * 5) >> 3; + *dest_buf++ += (*mp3_buf++ * 5) >> 3; mp3_buf += 1*2; } } -void mix_16h_to_32_s2(int *dest_buf, short *mp3_buf, int count) +void mix_16h_to_32_s2(s32 *dest_buf, s16 *mp3_buf, int count) { count >>= 1; while (count--) { - *dest_buf++ += *mp3_buf++ >> 1; - *dest_buf++ += *mp3_buf++ >> 1; + *dest_buf++ += (*mp3_buf++ * 5) >> 3; + *dest_buf++ += (*mp3_buf++ * 5) >> 3; mp3_buf += 3*2; } } +// mixes cdda audio @44.1 KHz into dest_buf, resampling with nearest neighbour +void mix_16h_to_32_resample_stereo(s32 *dest_buf, s16 *cdda_buf, int count, int fac16) +{ + int pos16 = 0; + while (count--) { + int pos = 2 * (pos16>>16); + *dest_buf++ += (cdda_buf[pos ] * 5) >> 3; + *dest_buf++ += (cdda_buf[pos+1] * 5) >> 3; + pos16 += fac16; + } +} + +// mixes cdda audio @44.1 KHz into dest_buf, resampling with nearest neighbour +void mix_16h_to_32_resample_mono(s32 *dest_buf, s16 *cdda_buf, int count, int fac16) +{ + int pos16 = 0; + while (count--) { + int pos = 2 * (pos16>>16); + *dest_buf += (cdda_buf[pos ] * 5) >> 4; + *dest_buf++ += (cdda_buf[pos+1] * 5) >> 4; + pos16 += fac16; + } +} + +void mix_reset(int alpha_q16) +{ + memset(&lfi2, 0, sizeof(lfi2)); + memset(&rfi2, 0, sizeof(rfi2)); + lfi2.alpha = rfi2.alpha = (0x10000-alpha_q16) >> 4; // filter alpha, Q12 +} diff --git a/pico/sound/mix.h b/pico/sound/mix.h index b9315114..43fb52a5 100644 --- a/pico/sound/mix.h +++ b/pico/sound/mix.h @@ -1,10 +1,14 @@ //void mix_32_to_32(int *dest, int *src, int count); -void mix_16h_to_32(int *dest, short *src, int count); -void mix_16h_to_32_s1(int *dest, short *src, int count); -void mix_16h_to_32_s2(int *dest, short *src, int count); -void mix_32_to_16l_stereo(short *dest, int *src, int count); -void mix_32_to_16_mono(short *dest, int *src, int count); +void mix_16h_to_32(s32 *dest, s16 *src, int count); +void mix_16h_to_32_s1(s32 *dest, s16 *src, int count); +void mix_16h_to_32_s2(s32 *dest, s16 *src, int count); -extern int mix_32_to_16l_level; -void mix_32_to_16l_stereo_lvl(short *dest, int *src, int count); +void mix_16h_to_32_resample_stereo(s32 *dest, s16 *src, int count, int fac16); +void mix_16h_to_32_resample_mono(s32 *dest, s16 *src, int count, int fac16); +void mix_32_to_16_stereo(s16 *dest, s32 *src, int count); +void mix_32_to_16_mono(s16 *dest, s32 *src, int count); + +extern int mix_32_to_16_level; +void mix_32_to_16_stereo_lvl(s16 *dest, s32 *src, int count); +void mix_reset(int alpha_q16); diff --git a/pico/sound/mix_arm.S b/pico/sound/mix_arm.S new file mode 100644 index 00000000..64383c3a --- /dev/null +++ b/pico/sound/mix_arm.S @@ -0,0 +1,606 @@ +/* + * Generic routines for mixing audio samples + * (C) notaz, 2007,2008 + * (C) irixxxx, 2019,2020 added filtering + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ + +.text +.align 4 + +@ this assumes src is word aligned +.global mix_16h_to_32 @ int *dest, short *src, int count + +mix_16h_to_32: + stmfd sp!, {r4-r6,lr} +/* + tst r1, #2 + beq m16_32_mo_unalw + ldrsh r4, [r1], #2 + ldr r3, [r0] + sub r2, r2, #1 + add r3, r3, r4, asr #1 + str r3, [r0], #4 +*/ +m16_32_mo_unalw: + subs r2, r2, #4 + bmi m16_32_end + +m16_32_loop: + ldmia r0, {r3-r6} + ldmia r1!,{r12,lr} + subs r2, r2, #4 + add r4, r4, r12,asr #17 @ we use 5/8 volume + add r4, r4, r12,asr #19 + mov r12,r12,lsl #16 + add r3, r3, r12,asr #17 + add r3, r3, r12,asr #19 + add r6, r6, lr, asr #17 + add r6, r6, lr, asr #19 + mov lr, lr, lsl #16 + add r5, r5, lr, asr #17 + add r5, r5, lr, asr #19 + stmia r0!,{r3-r6} + bpl m16_32_loop + +m16_32_end: + tst r2, #2 + beq m16_32_no_unal2 + ldr r5, [r1], #4 + ldmia r0, {r3,r4} + mov r12,r5, lsl #16 + add r3, r3, r12,asr #17 + add r3, r3, r12,asr #19 + add r4, r4, r5, asr #17 + add r4, r4, r5, asr #19 + stmia r0!,{r3,r4} + +m16_32_no_unal2: + tst r2, #1 + ldmeqfd sp!, {r4-r6,pc} + ldrsh r4, [r1], #2 + ldr r3, [r0] + add r3, r3, r4, asr #1 + add r3, r3, r4, asr #3 + str r3, [r0], #4 + + ldmfd sp!, {r4-r6,lr} + bx lr + + + +.global mix_16h_to_32_s1 @ int *dest, short *src, int count + +mix_16h_to_32_s1: + stmfd sp!, {r4-r6,lr} + + subs r2, r2, #4 + bmi m16_32_s1_end + +m16_32_s1_loop: + ldmia r0, {r3-r6} + ldr r12,[r1], #8 + ldr lr, [r1], #8 + subs r2, r2, #4 + add r4, r4, r12,asr #17 @ we use 5/8 volume + add r4, r4, r12,asr #19 + mov r12,r12,lsl #16 + add r3, r3, r12,asr #17 + add r3, r3, r12,asr #19 + add r6, r6, lr, asr #17 + add r6, r6, lr, asr #19 + mov lr, lr, lsl #16 + add r5, r5, lr, asr #17 + add r5, r5, lr, asr #19 + stmia r0!,{r3-r6} + bpl m16_32_s1_loop + +m16_32_s1_end: + tst r2, #2 + beq m16_32_s1_no_unal2 + ldr r5, [r1], #8 + ldmia r0, {r3,r4} + mov r12,r5, lsl #16 + add r3, r3, r12,asr #17 + add r3, r3, r12,asr #19 + add r4, r4, r5, asr #17 + add r4, r4, r5, asr #19 + stmia r0!,{r3,r4} + +m16_32_s1_no_unal2: + tst r2, #1 + ldmeqfd sp!, {r4-r6,pc} + ldrsh r4, [r1], #2 + ldr r3, [r0] + add r3, r3, r4, asr #1 + add r3, r3, r4, asr #3 + str r3, [r0], #4 + + ldmfd sp!, {r4-r6,lr} + bx lr + + + +.global mix_16h_to_32_s2 @ int *dest, short *src, int count + +mix_16h_to_32_s2: + stmfd sp!, {r4-r6,lr} + + subs r2, r2, #4 + bmi m16_32_s2_end + +m16_32_s2_loop: + ldmia r0, {r3-r6} + ldr r12,[r1], #16 + ldr lr, [r1], #16 + subs r2, r2, #4 + add r4, r4, r12,asr #17 @ we use 5/8 volume + add r4, r4, r12,asr #19 + mov r12,r12,lsl #16 + add r3, r3, r12,asr #17 + add r3, r3, r12,asr #19 + add r6, r6, lr, asr #17 + add r6, r6, lr, asr #19 + mov lr, lr, lsl #16 + add r5, r5, lr, asr #17 + add r5, r5, lr, asr #19 + stmia r0!,{r3-r6} + bpl m16_32_s2_loop + +m16_32_s2_end: + tst r2, #2 + beq m16_32_s2_no_unal2 + ldr r5, [r1], #16 + ldmia r0, {r3,r4} + mov r12,r5, lsl #16 + add r3, r3, r12,asr #17 + add r3, r3, r12,asr #19 + add r4, r4, r5, asr #17 + add r4, r4, r5, asr #19 + stmia r0!,{r3,r4} + +m16_32_s2_no_unal2: + tst r2, #1 + ldmeqfd sp!, {r4-r6,pc} + ldrsh r4, [r1], #2 + ldr r3, [r0] + add r3, r3, r4, asr #1 + add r3, r3, r4, asr #3 + str r3, [r0], #4 + + ldmfd sp!, {r4-r6,lr} + bx lr + + + +.global mix_16h_to_32_resample_stereo @ int *dest, short *src, int count, int fac16 + +mix_16h_to_32_resample_stereo: + stmfd sp!, {r4-r9,lr} + + subs r2, r2, #2 + mov r4, #0 + bmi m16_32_rss_end + +m16_32_rss_loop: + ldmia r0, {r5-r8} + mov r9, r4, lsr #16 + ldr r12,[r1, r9, lsl #2] + add r4, r4, r3 + mov r9, r4, lsr #16 + ldr lr ,[r1, r9, lsl #2] + add r4, r4, r3 + subs r2, r2, #2 + add r6, r6, r12,asr #17 @ we use 5/8 volume + add r6, r6, r12,asr #19 + mov r12,r12,lsl #16 + add r5, r5, r12,asr #17 + add r5, r5, r12,asr #19 + add r8, r8, lr, asr #17 + add r8, r8, lr, asr #19 + mov lr, lr, lsl #16 + add r7, r7, lr, asr #17 + add r7, r7, lr, asr #19 + stmia r0!,{r5-r8} + bpl m16_32_rss_loop + +m16_32_rss_end: + tst r2, #1 + ldmeqfd sp!, {r4-r9,pc} + mov r9, r4, lsr #16 + ldr lr ,[r1, r9, lsl #2] + ldmia r0, {r5,r6} + mov r12,lr, lsl #16 + add r5, r5, r12,asr #17 + add r5, r5, r12,asr #19 + add r6, r6, lr, asr #17 + add r6, r6, lr, asr #19 + stmia r0!,{r5,r6} + + ldmfd sp!, {r4-r9,lr} + bx lr + + + +.global mix_16h_to_32_resample_mono @ int *dest, short *src, int count, int fac16 + +mix_16h_to_32_resample_mono: + stmfd sp!, {r4-r6,r9,lr} + + subs r2, r2, #2 + mov r4, #0 + bmi m16_32_rsm_end + +m16_32_rsm_loop: + ldmia r0, {r5-r6} + mov r9, r4, lsr #16 + ldr r12,[r1, r9, lsl #2] + add r4, r4, r3 + mov r9, r4, lsr #16 + ldr lr ,[r1, r9, lsl #2] + add r4, r4, r3 + subs r2, r2, #2 + add r5, r5, r12,asr #18 @ we use 5/8 volume (= 5/16 vol per channel) + add r5, r5, r12,asr #20 + mov r12,r12,lsl #16 + add r5, r5, r12,asr #18 + add r5, r5, r12,asr #20 + add r6, r6, lr, asr #18 + add r6, r6, lr, asr #20 + mov lr, lr, lsl #16 + add r6, r6, lr, asr #18 + add r6, r6, lr, asr #20 + stmia r0!,{r5-r6} + bpl m16_32_rsm_loop + +m16_32_rsm_end: + tst r2, #1 + ldmeqfd sp!, {r4-r6,r9,pc} + mov r9, r4, lsr #16 + ldr lr ,[r1, r9, lsl #2] + ldr r5, [r0] + mov r12,lr, lsl #16 + add r5, r5, r12,asr #18 + add r5, r5, r12,asr #20 + add r5, r5, lr, asr #18 + add r5, r5, lr, asr #20 + str r5, [r0] + + ldmfd sp!, {r4-r6,r9,lr} + bx lr + + + +@ limit +@ reg=int_sample, r12=1, r8=tmp, kills flags +.macro Limit reg + sub \reg, \reg, \reg, asr #3 @ reduce audio lvl some to avoid clipping + add r8, r12, \reg, asr #15 + bics r8, r8, #1 @ in non-overflow conditions r8 is 0 or 1 + movne \reg, #0x8000 + subpl \reg, \reg, #1 +.endm + +@ limit and shift up by 16 +@ reg=int_sample, r12=1, r8=tmp, kills flags +.macro Limitsh reg + sub \reg, \reg, \reg, asr #3 @ reduce audio lvl some to avoid clipping + add r8, r12,\reg, asr #15 + bics r8, r8, #1 @ in non-overflow conditions r8 is 0 or 1 + moveq \reg, \reg, lsl #16 + movne \reg, #0x80000000 + subpl \reg, \reg, #0x00010000 +.endm + + +@ filter out DC offset +@ in=int_sample (max 20 bit), y=filter memory, r8=tmp +.macro DCfilt in y + rsb r8, \y, \in, lsl #12 @ fixpoint 20.12 + add \y, \y, r8, asr #12 @ alpha = 1-1/4094 + sub \in, \in, \y, asr #12 +.endm + +@ lowpass filter +@ in=int_sample (max 20 bit), y=filter memory, r12=alpha(Q8), r8=tmp +.macro LPfilt in y +@ mov r8, \y, asr #8 +@ rsb r8, r8, \in, lsl #4 @ fixpoint 20.12 + sub r8, \in, \y, asr #12 @ fixpoint 20.12 + mla \y, r8, r12, \y + mov \in, \y, asr #12 +.endm + + +@ mix 32bit audio (with 16bits really used, upper bits indicate overflow) with normal 16 bit audio with left channel only +@ warning: this function assumes dest is word aligned +.global mix_32_to_16_stereo @ short *dest, int *src, int count + +mix_32_to_16_stereo: + stmfd sp!, {r4-r8,r10-r11,lr} + + mov r2, r2, lsl #1 + subs r2, r2, #4 + bmi m32_16_st_end + + ldr r12, =filter + ldr r8, [r12], #4 + ldmia r12, {r3,r10-r11,lr} + str r8, [sp, #-4]! + +m32_16_st_loop: + ldmia r0, {r8,r12} + ldmia r1!, {r4-r7} + add r5, r5, r8, asr #16 + add r7, r7, r12,asr #16 + mov r8, r8, lsl #16 + mov r12,r12,lsl #16 + add r4, r4, r8, asr #16 + add r6, r6, r12,asr #16 + ldr r12,[sp] + LPfilt r4, r3 + LPfilt r5, lr + LPfilt r6, r3 + LPfilt r7, lr + DCfilt r4, r10 + DCfilt r5, r11 + DCfilt r6, r10 + DCfilt r7, r11 + mov r12,#1 + Limitsh r4 + Limitsh r5 + Limitsh r6 + Limitsh r7 + subs r2, r2, #4 + orr r4, r5, r4, lsr #16 + orr r5, r7, r6, lsr #16 + stmia r0!, {r4,r5} + bpl m32_16_st_loop + +m32_16_st_end: + @ check for remaining bytes to convert + tst r2, #2 + beq m32_16_st_no_unal2 + ldr r6, [r0] + ldmia r1!,{r4,r5} + add r5, r5, r6, asr #16 + mov r6, r6, lsl #16 + add r4, r4, r6, asr #16 + ldr r12,[sp] + LPfilt r4, r3 + LPfilt r5, lr + DCfilt r4, r10 + DCfilt r5, r11 + mov r12,#1 + Limitsh r4 + Limitsh r5 + orr r4, r5, r4, lsr #16 + str r4, [r0], #4 + +m32_16_st_no_unal2: + ldr r12, =filter + add r12,r12, #4 + stmia r12, {r3,r10-r11,lr} + add sp, sp, #4 + ldmfd sp!, {r4-r8,r10-r11,lr} + bx lr + + +@ mix 32bit audio (with 16bits really used, upper bits indicate overflow) with normal 16 bit audio (for mono sound) +.global mix_32_to_16_mono @ short *dest, int *src, int count + +mix_32_to_16_mono: + stmfd sp!, {r4-r8,r10-r11,lr} + + ldr r12, =filter + ldr r8, [r12], #4 + ldmia r12, {r10-r11} + str r8, [sp, #-4]! + + @ check if dest is word aligned + tst r0, #2 + beq m32_16_mo_no_unalw + ldrsh r5, [r0] + ldr r4, [r1], #4 + sub r2, r2, #1 + add r4, r4, r5 + ldr r12,[sp] + LPfilt r4, r11 + DCfilt r4, r10 + mov r12,#1 + Limit r4 + strh r4, [r0], #2 + +m32_16_mo_no_unalw: + subs r2, r2, #4 + bmi m32_16_mo_end + +m32_16_mo_loop: + ldmia r0, {r8,r12} + ldmia r1!, {r4-r7} + add r5, r5, r8, asr #16 + add r7, r7, r12,asr #16 + mov r8, r8, lsl #16 + mov r12,r12,lsl #16 + add r4, r4, r8, asr #16 + add r6, r6, r12,asr #16 + ldr r12,[sp] + LPfilt r4, r11 + LPfilt r5, r11 + LPfilt r6, r11 + LPfilt r7, r11 + DCfilt r4, r10 + DCfilt r5, r10 + DCfilt r6, r10 + DCfilt r7, r10 + mov r12,#1 + Limitsh r4 + Limitsh r5 + Limitsh r6 + Limitsh r7 + subs r2, r2, #4 + orr r4, r5, r4, lsr #16 + orr r5, r7, r6, lsr #16 + stmia r0!, {r4,r5} + bpl m32_16_mo_loop + +m32_16_mo_end: + @ check for remaining bytes to convert + tst r2, #2 + beq m32_16_mo_no_unal2 + ldr r6, [r0] + ldmia r1!,{r4,r5} + add r5, r5, r6, asr #16 + mov r6, r6, lsl #16 + add r4, r4, r6, asr #16 + ldr r12,[sp] + LPfilt r4, r11 + LPfilt r5, r11 + DCfilt r4, r10 + DCfilt r5, r10 + mov r12,#1 + Limitsh r4 + Limitsh r5 + orr r4, r5, r4, lsr #16 + str r4, [r0], #4 + +m32_16_mo_no_unal2: + tst r2, #1 + beq m32_16_mo_no_unal + ldrsh r5, [r0] + ldr r4, [r1], #4 + add r4, r4, r5 + ldr r12,[sp] + LPfilt r4, r11 + DCfilt r4, r10 + mov r12,#1 + Limit r4 + strh r4, [r0], #2 + +m32_16_mo_no_unal: + ldr r12, =filter + add r12,r12, #4 + stmia r12, {r10-r11} + add sp, sp, #4 + ldmfd sp!, {r4-r8,r10-r11,lr} + bx lr + + +#ifdef __GP2X__ + +.data +.align 4 + +.global mix_32_to_16_level +mix_32_to_16_level: + .word 0 + +.text +.align 4 + +@ same as mix_32_to_16_stereo, but with additional shift +.global mix_32_to_16_stereo_lvl @ short *dest, int *src, int count + +mix_32_to_16_stereo_lvl: + stmfd sp!, {r4-r11,lr} + + ldr r9, =mix_32_to_16_level + mov lr, #1 + ldr r9, [r9] + ldr r12, =filter + ldr r8, [r12], #4 + ldmia r12, {r3,r10-r11,lr} + str r8, [sp, #-4]! + + mov r2, r2, lsl #1 + subs r2, r2, #4 + bmi m32_16_st_l_end + +m32_16_st_l_loop: + ldmia r0, {r8,r12} + ldmia r1!, {r4-r7} + add r5, r5, r8, asr #16 + add r7, r7, r12,asr #16 + mov r8, r8, lsl #16 + mov r12,r12,lsl #16 + add r4, r4, r8, asr #16 + add r6, r6, r12,asr #16 + mov r4, r4, asr r9 + mov r5, r5, asr r9 + mov r6, r6, asr r9 + mov r7, r7, asr r9 + ldr r12,[sp] + LPfilt r4, r3 + LPfilt r5, lr + LPfilt r6, r3 + LPfilt r7, lr + DCfilt r4, r10 + DCfilt r5, r11 + DCfilt r6, r10 + DCfilt r7, r11 + mov r12,#1 + Limitsh r4 + Limitsh r5 + Limitsh r6 + Limitsh r7 + subs r2, r2, #4 + orr r4, r5, r4, lsr #16 + orr r5, r7, r6, lsr #16 + stmia r0!, {r4,r5} + bpl m32_16_st_l_loop + +m32_16_st_l_end: + @ check for remaining bytes to convert + tst r2, #2 + beq m32_16_st_l_no_unal2 + ldr r6, [r0] + ldmia r1!,{r4,r5} + add r5, r5, r6, asr #16 + mov r6, r6, lsl #16 + add r4, r4, r6, asr #16 + mov r4, r4, asr r9 + mov r5, r5, asr r9 + ldr r12,[sp] + LPfilt r4, r3 + LPfilt r5, lr + DCfilt r4, r10 + DCfilt r5, r11 + mov r12,#1 + Limitsh r4 + Limitsh r5 + orr r4, r5, r4, lsr #16 + str r4, [r0], #4 + +m32_16_st_l_no_unal2: + ldr r12, =filter + add r12,r12, #4 + stmia r12, {r3,r10-r11,lr} + add sp, sp, #4 + ldmfd sp!, {r4-r11,lr} + bx lr + +#endif /* __GP2X__ */ + +.global mix_reset @ int alpha_q16 +mix_reset: + ldr r2, =filter + rsb r0, r0, #0x10000 +@ mov r0, r0, asr #8 + mov r0, r0, asr #4 + str r0, [r2], #4 + mov r1, #0 + str r1, [r2], #4 + str r1, [r2], #4 + str r1, [r2], #4 + str r1, [r2], #4 + bx lr + +.data +filter: + .ds 4 @ alpha_q8 + .ds 8 @ filter history for left channel + .ds 8 @ filter history for right channel + +@ vim:filetype=armasm diff --git a/pico/sound/mix_arm.s b/pico/sound/mix_arm.s deleted file mode 100644 index 60a09495..00000000 --- a/pico/sound/mix_arm.s +++ /dev/null @@ -1,369 +0,0 @@ -/* - * Generic routines for mixing audio samples - * (C) notaz, 2007,2008 - * - * This work is licensed under the terms of MAME license. - * See COPYING file in the top-level directory. - */ - -.text -.align 4 - -@ this assumes src is word aligned -.global mix_16h_to_32 @ int *dest, short *src, int count - -mix_16h_to_32: - stmfd sp!, {r4-r6,lr} -/* - tst r1, #2 - beq m16_32_mo_unalw - ldrsh r4, [r1], #2 - ldr r3, [r0] - sub r2, r2, #1 - add r3, r3, r4, asr #1 - str r3, [r0], #4 -*/ -m16_32_mo_unalw: - subs r2, r2, #4 - bmi m16_32_end - -m16_32_loop: - ldmia r0, {r3-r6} - ldmia r1!,{r12,lr} - subs r2, r2, #4 - add r4, r4, r12,asr #17 @ we use half volume - mov r12,r12,lsl #16 - add r3, r3, r12,asr #17 - add r6, r6, lr, asr #17 - mov lr, lr, lsl #16 - add r5, r5, lr, asr #17 - stmia r0!,{r3-r6} - bpl m16_32_loop - -m16_32_end: - tst r2, #2 - beq m16_32_no_unal2 - ldr r5, [r1], #4 - ldmia r0, {r3,r4} - mov r12,r5, lsl #16 - add r3, r3, r12,asr #17 - add r4, r4, r5, asr #17 - stmia r0!,{r3,r4} - -m16_32_no_unal2: - tst r2, #1 - ldmeqfd sp!, {r4-r6,pc} - ldrsh r4, [r1], #2 - ldr r3, [r0] - add r3, r3, r4, asr #1 - str r3, [r0], #4 - - ldmfd sp!, {r4-r6,lr} - bx lr - - - -.global mix_16h_to_32_s1 @ int *dest, short *src, int count - -mix_16h_to_32_s1: - stmfd sp!, {r4-r6,lr} - - subs r2, r2, #4 - bmi m16_32_s1_end - -m16_32_s1_loop: - ldmia r0, {r3-r6} - ldr r12,[r1], #8 - ldr lr, [r1], #8 - subs r2, r2, #4 - add r4, r4, r12,asr #17 - mov r12,r12,lsl #16 - add r3, r3, r12,asr #17 @ we use half volume - add r6, r6, lr, asr #17 - mov lr, lr, lsl #16 - add r5, r5, lr, asr #17 - stmia r0!,{r3-r6} - bpl m16_32_s1_loop - -m16_32_s1_end: - tst r2, #2 - beq m16_32_s1_no_unal2 - ldr r5, [r1], #8 - ldmia r0, {r3,r4} - mov r12,r5, lsl #16 - add r3, r3, r12,asr #17 - add r4, r4, r5, asr #17 - stmia r0!,{r3,r4} - -m16_32_s1_no_unal2: - tst r2, #1 - ldmeqfd sp!, {r4-r6,pc} - ldrsh r4, [r1], #2 - ldr r3, [r0] - add r3, r3, r4, asr #1 - str r3, [r0], #4 - - ldmfd sp!, {r4-r6,lr} - bx lr - - - -.global mix_16h_to_32_s2 @ int *dest, short *src, int count - -mix_16h_to_32_s2: - stmfd sp!, {r4-r6,lr} - - subs r2, r2, #4 - bmi m16_32_s2_end - -m16_32_s2_loop: - ldmia r0, {r3-r6} - ldr r12,[r1], #16 - ldr lr, [r1], #16 - subs r2, r2, #4 - add r4, r4, r12,asr #17 - mov r12,r12,lsl #16 - add r3, r3, r12,asr #17 @ we use half volume - add r6, r6, lr, asr #17 - mov lr, lr, lsl #16 - add r5, r5, lr, asr #17 - stmia r0!,{r3-r6} - bpl m16_32_s2_loop - -m16_32_s2_end: - tst r2, #2 - beq m16_32_s2_no_unal2 - ldr r5, [r1], #16 - ldmia r0, {r3,r4} - mov r12,r5, lsl #16 - add r3, r3, r12,asr #17 - add r4, r4, r5, asr #17 - stmia r0!,{r3,r4} - -m16_32_s2_no_unal2: - tst r2, #1 - ldmeqfd sp!, {r4-r6,pc} - ldrsh r4, [r1], #2 - ldr r3, [r0] - add r3, r3, r4, asr #1 - str r3, [r0], #4 - - ldmfd sp!, {r4-r6,lr} - bx lr - - - -@ limit -@ reg=int_sample, lr=1, r3=tmp, kills flags -.macro Limit reg - add r3, lr, \reg, asr #15 - bics r3, r3, #1 @ in non-overflow conditions r3 is 0 or 1 - movne \reg, #0x8000 - subpl \reg, \reg, #1 -.endm - - -@ limit and shift up by 16 -@ reg=int_sample, lr=1, r3=tmp, kills flags -.macro Limitsh reg -@ movs r4, r3, asr #16 -@ cmnne r4, #1 -@ beq c32_16_no_overflow -@ tst r4, r4 -@ mov r3, #0x8000 -@ subpl r3, r3, #1 - - add r3, lr, \reg, asr #15 - bics r3, r3, #1 @ in non-overflow conditions r3 is 0 or 1 - moveq \reg, \reg, lsl #16 - movne \reg, #0x80000000 - subpl \reg, \reg, #0x00010000 -.endm - - -@ mix 32bit audio (with 16bits really used, upper bits indicate overflow) with normal 16 bit audio with left channel only -@ warning: this function assumes dest is word aligned -.global mix_32_to_16l_stereo @ short *dest, int *src, int count - -mix_32_to_16l_stereo: - stmfd sp!, {r4-r8,lr} - - mov lr, #1 - - mov r2, r2, lsl #1 - subs r2, r2, #4 - bmi m32_16l_st_end - -m32_16l_st_loop: - ldmia r0, {r8,r12} - ldmia r1!, {r4-r7} - mov r8, r8, lsl #16 - mov r12,r12,lsl #16 - add r4, r4, r8, asr #16 - add r5, r5, r8, asr #16 - add r6, r6, r12,asr #16 - add r7, r7, r12,asr #16 - Limitsh r4 - Limitsh r5 - Limitsh r6 - Limitsh r7 - subs r2, r2, #4 - orr r4, r5, r4, lsr #16 - orr r5, r7, r6, lsr #16 - stmia r0!, {r4,r5} - bpl m32_16l_st_loop - -m32_16l_st_end: - @ check for remaining bytes to convert - tst r2, #2 - beq m32_16l_st_no_unal2 - ldrsh r6, [r0] - ldmia r1!,{r4,r5} - add r4, r4, r6 - add r5, r5, r6 - Limitsh r4 - Limitsh r5 - orr r4, r5, r4, lsr #16 - str r4, [r0], #4 - -m32_16l_st_no_unal2: - ldmfd sp!, {r4-r8,lr} - bx lr - - -@ mix 32bit audio (with 16bits really used, upper bits indicate overflow) with normal 16 bit audio (for mono sound) -.global mix_32_to_16_mono @ short *dest, int *src, int count - -mix_32_to_16_mono: - stmfd sp!, {r4-r8,lr} - - mov lr, #1 - - @ check if dest is word aligned - tst r0, #2 - beq m32_16_mo_no_unalw - ldrsh r5, [r0] - ldr r4, [r1], #4 - sub r2, r2, #1 - add r4, r4, r5 - Limit r4 - strh r4, [r0], #2 - -m32_16_mo_no_unalw: - subs r2, r2, #4 - bmi m32_16_mo_end - -m32_16_mo_loop: - ldmia r0, {r8,r12} - ldmia r1!, {r4-r7} - add r5, r5, r8, asr #16 - mov r8, r8, lsl #16 - add r4, r4, r8, asr #16 - add r7, r7, r12,asr #16 - mov r12,r12,lsl #16 - add r6, r6, r12,asr #16 - Limitsh r4 - Limitsh r5 - Limitsh r6 - Limitsh r7 - subs r2, r2, #4 - orr r4, r5, r4, lsr #16 - orr r5, r7, r6, lsr #16 - stmia r0!, {r4,r5} - bpl m32_16_mo_loop - -m32_16_mo_end: - @ check for remaining bytes to convert - tst r2, #2 - beq m32_16_mo_no_unal2 - ldr r6, [r0] - ldmia r1!,{r4,r5} - add r5, r5, r6, asr #16 - mov r6, r6, lsl #16 - add r4, r4, r6, asr #16 - Limitsh r4 - Limitsh r5 - orr r4, r5, r4, lsr #16 - str r4, [r0], #4 - -m32_16_mo_no_unal2: - tst r2, #1 - ldmeqfd sp!, {r4-r8,pc} - ldrsh r5, [r0] - ldr r4, [r1], #4 - add r4, r4, r5 - Limit r4 - strh r4, [r0], #2 - - ldmfd sp!, {r4-r8,lr} - bx lr - - - -.data -.align 4 - -.global mix_32_to_16l_level -mix_32_to_16l_level: - .word 0 - -.text -.align 4 - -@ same as mix_32_to_16l_stereo, but with additional shift -.global mix_32_to_16l_stereo_lvl @ short *dest, int *src, int count - -mix_32_to_16l_stereo_lvl: - stmfd sp!, {r4-r9,lr} - - ldr r9, =mix_32_to_16l_level - mov lr, #1 - ldr r9, [r9] - - mov r2, r2, lsl #1 - subs r2, r2, #4 - bmi m32_16l_st_l_end - -m32_16l_st_l_loop: - ldmia r0, {r8,r12} - ldmia r1!, {r4-r7} - mov r8, r8, lsl #16 - mov r12,r12,lsl #16 - add r4, r4, r8, asr #16 - add r5, r5, r8, asr #16 - add r6, r6, r12,asr #16 - add r7, r7, r12,asr #16 - mov r4, r4, asr r9 - mov r5, r5, asr r9 - mov r6, r6, asr r9 - mov r7, r7, asr r9 - Limitsh r4 - Limitsh r5 - Limitsh r6 - Limitsh r7 - subs r2, r2, #4 - orr r4, r5, r4, lsr #16 - orr r5, r7, r6, lsr #16 - stmia r0!, {r4,r5} - bpl m32_16l_st_l_loop - -m32_16l_st_l_end: - @ check for remaining bytes to convert - tst r2, #2 - beq m32_16l_st_l_no_unal2 - ldrsh r6, [r0] - ldmia r1!,{r4,r5} - add r4, r4, r6 - add r5, r5, r6 - mov r4, r4, asr r9 - mov r5, r5, asr r9 - Limitsh r4 - Limitsh r5 - orr r4, r5, r4, lsr #16 - str r4, [r0], #4 - -m32_16l_st_l_no_unal2: - ldmfd sp!, {r4-r9,lr} - bx lr - -@ vim:filetype=armasm diff --git a/pico/sound/resampler.c b/pico/sound/resampler.c new file mode 100644 index 00000000..8fc0a785 --- /dev/null +++ b/pico/sound/resampler.c @@ -0,0 +1,267 @@ +/* Configurable fixed point resampling SINC filter for mono and stereo audio. + * + * (C) 2022 irixxxx + * + * This work is licensed under the terms of any of these licenses + * (at your option): + * - GNU GPL, version 2 or later. + * - MAME license. + * See COPYING file in the top-level directory. + */ + + +/* SINC filter generation taken from the blipper library, its license is: + * + * Copyright (C) 2013 - Hans-Kristian Arntzen + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and + * associated documentation files (the "Software"), + * to deal in the Software without restriction, + * including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include +#include +#include +#include + +#include "../pico_types.h" +#include "resampler.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +static double besseli0(double x) +{ + unsigned i; + double sum = 0.0; + + double factorial = 1.0; + double factorial_mult = 0.0; + double x_pow = 1.0; + double two_div_pow = 1.0; + double x_sqr = x * x; + + /* Approximate. This is an infinite sum. + * Luckily, it converges rather fast. */ + for (i = 0; i < 18; i++) + { + sum += x_pow * two_div_pow / (factorial * factorial); + + factorial_mult += 1.0; + x_pow *= x_sqr; + two_div_pow *= 0.25; + factorial *= factorial_mult; + } + + return sum; +} + +static double sinc(double v) +{ + if (fabs(v) < 0.00001) + return 1.0; + else + return sin(v) / v; +} + +/* index range = [-1, 1) */ +static double kaiser_window(double index, double beta) +{ + return besseli0(beta * sqrt(1.0 - index * index)); +} + +/* Creates a polyphase SINC filter (:phases banks with :taps each) + * Interleaves the filter for cache coherency and possibilities for SIMD */ +static s16 *create_sinc(unsigned phases, unsigned taps, double cutoff, double beta) +{ + unsigned i, filter_len; + double sidelobes, window_mod, window_phase, sinc_phase; + s16 *filter; + double tap; + + filter = (s16*)malloc(phases * taps * sizeof(*filter)); + if (!filter) + return NULL; + + sidelobes = taps / 2.0; + window_mod = 1.0 / kaiser_window(0.0, beta); + filter_len = phases * taps; + + for (i = 0; i < filter_len; i++) + { + window_phase = (double)i / filter_len; /* [0, 1) */ + window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */ + sinc_phase = window_phase * sidelobes; /* [-taps / 2, taps / 2) */ + + tap = (cutoff * sinc(M_PI * sinc_phase * cutoff) * + kaiser_window(window_phase, beta) * window_mod); + /* assign taking filter bank interleaving into account: + * :phases banks of length :taps */ + filter[(i%phases)*taps + (i/phases)] = tap * 0x7fff + 0.5; + } + + return filter; +} + +/* Public interface */ + +/* Release a resampler */ +void resampler_free(resampler_t *rs) +{ + if (rs) + { + free(rs->buffer); + free(rs->filter); + free(rs); + } +} + +/* Create a resampler with upsampling factor :interpolation and downsampling + * factor :decimation, Kaiser windowed SINC polyphase FIR with bank size :taps. + * The created filter has a size of :taps*:interpolation for upsampling and + * :taps*:decimation for downsampling. :taps is limiting the cost per sample and + * should be big enough to avoid inaccuracy (>= 8, higher is more accurate). + * :cutoff is in [0..1] with 1 representing the Nyquist rate after decimation. + * :beta is the Kaiser window beta. + * :max_input is the maximum length in a resampler_update call */ +resampler_t *resampler_new(unsigned taps, unsigned interpolation, unsigned decimation, + double cutoff, double beta, unsigned max_input, int stereo) +{ + resampler_t *rs = NULL; + + if (taps == 0 || interpolation == 0 || decimation == 0 || max_input == 0) + return NULL; /* invalid parameters */ + + rs = (resampler_t*)calloc(1, sizeof(*rs)); + if (!rs) + return NULL; /* out of memory */ + + /* :cutoff is relative to the decimated frequency, but filtering is taking + * place at the interpolated frequency. It needs to be adapted if resampled + * rate is lower. Also needs more taps to keep the transition band width */ + if (decimation > interpolation) { + cutoff = cutoff * interpolation/decimation; + taps = taps * decimation/interpolation; + } + + rs->interpolation = interpolation; + rs->decimation = decimation; + rs->taps = taps; + /* optimizers for resampler_update: */ + rs->interp_inv = (1ULL<<32) / interpolation; + rs->ratio_int = decimation / interpolation; + + rs->filter = create_sinc(interpolation, taps, cutoff, beta); + if (!rs->filter) + goto error; + + rs->stereo = !!stereo; + rs->buffer_sz = (max_input * decimation/interpolation) + decimation + 1; + rs->buffer = calloc(1, rs->buffer_sz * (stereo ? 2:1) * sizeof(*rs->buffer)); + if (!rs->buffer) + goto error; + + return rs; + +error: + if (rs->filter) + free(rs->filter); + if (rs->buffer) + free(rs->buffer); + free(rs); + return NULL; +} + +/* Obtain :length resampled audio frames in :buffer. Use :get_samples to obtain + * the needed amount of input samples */ +void resampler_update(resampler_t *rs, s32 *buffer, int length, + void (*get_samples)(s32 *buffer, int length, int stereo)) +{ + s16 *u; + s32 *p, *q = buffer; + int spf = rs->stereo; + s32 inlen; + s32 l, r; + int n, i; + + if (length <= 0) return; + + /* compute samples needed on input side: + * inlen = (length*decimation + interpolation-phase) / interpolation */ + n = length*rs->decimation + rs->interpolation-rs->phase; + inlen = ((u64)n * rs->interp_inv) >> 32; /* input samples, n/interpolation */ + if (n - inlen * rs->interpolation > rs->interpolation) inlen++; /* rounding */ + + /* reset buffer to start if the input doesn't fit into the buffer */ + if (rs->buffer_idx + inlen+rs->taps >= rs->buffer_sz) { + memcpy(rs->buffer, rs->buffer + (rs->buffer_idx<taps<buffer)); + rs->buffer_idx = 0; + } + p = rs->buffer + (rs->buffer_idx< 0) + get_samples(p + (rs->taps<stereo); + + if (rs->stereo) { + while (--length >= 0) { + /* compute filter output */ + s32 *h = p; + u = rs->filter + (rs->phase * rs->taps); + for (i = rs->taps-1, l = r = 0; i > 0; i -= 2) + { n = *u++; l += n * *h++; r += n * *h++; + n = *u++; l += n * *h++; r += n * *h++; } + if (i == 0) + { n = *u++; l += n * *h++; r += n * *h++; } + *q++ = l >> 15, *q++ = r >> 15; + /* advance position to next sample */ + rs->phase -= rs->decimation; +// if (rs->ratio_int) { + rs->phase += rs->ratio_int*rs->interpolation, + p += 2*rs->ratio_int, rs->buffer_idx += rs->ratio_int; +// } + if (rs->phase < 0) + { rs->phase += rs->interpolation, p += 2, rs->buffer_idx ++; } + } + } else { + while (--length >= 0) { + /* compute filter output */ + s32 *h = p; + u = rs->filter + (rs->phase * rs->taps); + for (i = rs->taps-1, l = r = 0; i > 0; i -= 2) + { n = *u++; l += n * *h++; + n = *u++; l += n * *h++; } + if (i == 0) + { n = *u++; l += n * *h++; } + *q++ = l >> 15; + /* advance position to next sample */ + rs->phase -= rs->decimation; +// if (rs->ratio_int) { + rs->phase += rs->ratio_int*rs->interpolation, + p += rs->ratio_int, rs->buffer_idx += rs->ratio_int; +// } + if (rs->phase < 0) + { rs->phase += rs->interpolation, p += 1, rs->buffer_idx ++; } + } + } +} diff --git a/pico/sound/resampler.h b/pico/sound/resampler.h new file mode 100644 index 00000000..b02de158 --- /dev/null +++ b/pico/sound/resampler.h @@ -0,0 +1,44 @@ +/* Configurable fixed point resampling SINC filter for mono and stereo audio. + * + * (C) 2022 irixxxx + * + * This work is licensed under the terms of any of these licenses + * (at your option): + * - GNU GPL, version 2 or later. + * - MAME license. + * See COPYING file in the top-level directory. + */ + +struct resampler { + int stereo; // mono or stereo? + int taps; // taps to compute per output sample + int interpolation; // upsampling factor (numerator) + int decimation; // downsampling factor (denominator) + int ratio_int; // floor(decimation/interpolation) + u32 interp_inv; // Q16, 1.0/interpolation + s16 *filter; // filter taps + s32 *buffer; // filter history and input buffer (w/o zero stuffing) + int buffer_sz; // buffer size in frames + int buffer_idx; // buffer offset + int phase; // filter phase for last output sample +}; +typedef struct resampler resampler_t; + + +/* Release a resampler */ +void resampler_free(resampler_t *r); +/* Create a resampler with upsampling factor :interpolation and downsampling + * factor :decimation, Kaiser windowed SINC polyphase FIR with bank size :taps. + * The created filter has a size of :taps*:interpolation for upsampling and + * :taps*:decimation for downsampling. :taps is limiting the cost per sample and + * should be big enough to avoid inaccuracy (>= 8, higher is more accurate). + * :cutoff is in [0..1] with 1 representing the Nyquist rate after decimation. + * :beta is the Kaiser window beta. + * :max_input is the maximum length in a resampler_update call */ +resampler_t *resampler_new(unsigned taps, unsigned interpolation, unsigned decimation, + double cutoff, double beta, unsigned max_input, int stereo); +/* Obtain :length resampled audio frames in :buffer. Use :get_samples to obtain + * the needed amount of input samples */ +void resampler_update(resampler_t *r, s32 *buffer, int length, + void (*generate_samples)(s32 *buffer, int length, int stereo)); + diff --git a/pico/sound/sn76496.c b/pico/sound/sn76496.c index 8474c703..1624d136 100644 --- a/pico/sound/sn76496.c +++ b/pico/sound/sn76496.c @@ -22,7 +22,7 @@ #include "sn76496.h" -#define MAX_OUTPUT 0x47ff // was 0x7fff +#define MAX_OUTPUT 0x4800 // was 0x7fff #define STEP 0x10000 @@ -31,22 +31,15 @@ /* bit0 = output */ /* noise feedback for white noise mode (verified on real SN76489 by John Kortink) */ -#define FB_WNOISE 0x14002 /* (16bits) bit16 = bit0(out) ^ bit2 ^ bit15 */ +#define FB_WNOISE_T 0x3000 /* (15bits) bit15 = bit1 ^ bit2, TI */ +#define FB_WNOISE_S 0x9000 /* (16bits) bit16 = bit0 ^ bit3, Sega PSG */ /* noise feedback for periodic noise mode */ -//#define FB_PNOISE 0x10000 /* 16bit rorate */ -#define FB_PNOISE 0x08000 /* JH 981127 - fixes Do Run Run */ +#define FB_PNOISE_T 0x4000 /* 15bit rotate for TI */ +#define FB_PNOISE_S 0x8000 /* 16bit rotate for Sega PSG */ -/* -0x08000 is definitely wrong. The Master System conversion of Marble Madness -uses periodic noise as a baseline. With a 15-bit rotate, the bassline is -out of tune. -The 16-bit rotate has been confirmed against a real PAL Sega Master System 2. -Hope that helps the System E stuff, more news on the PSG as and when! -*/ - -/* noise generator start preset (for periodic noise) */ -#define NG_PRESET 0x0f35 +#define FB_WNOISE FB_WNOISE_S /* Sega */ +#define FB_PNOISE FB_PNOISE_S struct SN76496 @@ -58,111 +51,70 @@ struct SN76496 int Register[8]; /* registers */ int LastRegister; /* last register written */ int Volume[4]; /* volume of voice 0-2 and noise */ - unsigned int RNG; /* noise generator */ + unsigned int RNG; /* noise generator */ int NoiseFB; /* noise feedback mask */ int Period[4]; int Count[4]; int Output[4]; - int pad[1]; + int Panning; }; static struct SN76496 ono_sn; // one and only SN76496 -int *sn76496_regs; +int *sn76496_regs = ono_sn.Register; //static void SN76496Write(int data) { struct SN76496 *R = &ono_sn; - int n; - + int n, r, c; /* update the output buffer before changing the registers */ //stream_update(R->Channel,0); + r = R->LastRegister; if (data & 0x80) - { - int r = (data & 0x70) >> 4; - int c = r/2; + r = R->LastRegister = (data & 0x70) >> 4; + c = r / 2; - R->LastRegister = r; - R->Register[r] = (R->Register[r] & 0x3f0) | (data & 0x0f); - switch (r) - { - case 0: /* tone 0 : frequency */ - case 2: /* tone 1 : frequency */ - case 4: /* tone 2 : frequency */ - R->Period[c] = R->UpdateStep * R->Register[r]; - if (R->Period[c] == 0) R->Period[c] = R->UpdateStep; - if (r == 4) - { - /* update noise shift frequency */ - if ((R->Register[6] & 0x03) == 0x03) - R->Period[3] = 2 * R->Period[2]; - } - break; - case 1: /* tone 0 : volume */ - case 3: /* tone 1 : volume */ - case 5: /* tone 2 : volume */ - case 7: /* noise : volume */ - R->Volume[c] = R->VolTable[data & 0x0f]; - break; - case 6: /* noise : frequency, mode */ - { - int n = R->Register[6]; - R->NoiseFB = (n & 4) ? FB_WNOISE : FB_PNOISE; - n &= 3; - /* N/512,N/1024,N/2048,Tone #3 output */ - R->Period[3] = ((n&3) == 3) ? 2 * R->Period[2] : (R->UpdateStep << (5+(n&3))); - - /* reset noise shifter */ - R->RNG = NG_PRESET; - R->Output[3] = R->RNG & 1; - } - break; - } - } + if (!(data & 0x80) && (r == 0 || r == 2 || r == 4)) + // data byte (tone only) + R->Register[r] = (R->Register[r] & 0x0f) | ((data & 0x3f) << 4); else + R->Register[r] = (R->Register[r] & 0x3f0) | (data & 0x0f); + + data = R->Register[r]; + switch (r) { - int r = R->LastRegister; - int c = r/2; + case 0: /* tone 0 : frequency */ + case 2: /* tone 1 : frequency */ + case 4: /* tone 2 : frequency */ + R->Period[c] = R->UpdateStep * data; + if (R->Period[c] == 0) R->Period[c] = R->UpdateStep; + if (R->Count[c] > R->Period[c]) R->Count[c] = R->Period[c]; + if (r == 4) + { + /* update noise shift frequency */ + if ((R->Register[6] & 0x03) == 0x03) + R->Period[3] = 2 * R->Period[2]; + } + break; + case 1: /* tone 0 : volume */ + case 3: /* tone 1 : volume */ + case 5: /* tone 2 : volume */ + case 7: /* noise : volume */ + R->Volume[c] = R->VolTable[data & 0x0f]; + break; + case 6: /* noise : frequency, mode */ + n = data; + R->NoiseFB = (n & 4) ? FB_WNOISE : FB_PNOISE; + n &= 3; + /* N/512,N/1024,N/2048,Tone #3 output */ + R->Period[3] = 2 * (n == 3 ? R->Period[2] : R->UpdateStep << (4 + n)); - switch (r) - { - case 0: /* tone 0 : frequency */ - case 2: /* tone 1 : frequency */ - case 4: /* tone 2 : frequency */ - R->Register[r] = (R->Register[r] & 0x0f) | ((data & 0x3f) << 4); - R->Period[c] = R->UpdateStep * R->Register[r]; - if (R->Period[c] == 0) R->Period[c] = R->UpdateStep; - if (r == 4) - { - /* update noise shift frequency */ - if ((R->Register[6] & 0x03) == 0x03) - R->Period[3] = 2 * R->Period[2]; - } - break; - case 1: /* tone 0 : volume */ - case 3: /* tone 1 : volume */ - case 5: /* tone 2 : volume */ - case 7: /* noise : volume */ - R->Volume[c] = R->VolTable[data & 0x0f]; - R->Register[r] = (R->Register[r] & 0x3f0) | (data & 0x0f); - break; - case 6: /* noise : frequency, mode */ - { - R->Register[r] = (R->Register[r] & 0x3f0) | (data & 0x0f); - n = R->Register[6]; - R->NoiseFB = (n & 4) ? FB_WNOISE : FB_PNOISE; - n &= 3; - /* N/512,N/1024,N/2048,Tone #3 output */ - R->Period[3] = ((n&3) == 3) ? 2 * R->Period[2] : (R->UpdateStep << (5+(n&3))); - - /* reset noise shifter */ - R->RNG = NG_PRESET; - R->Output[3] = R->RNG & 1; - } - break; - } + /* reset noise shifter */ + R->RNG = FB_PNOISE; + R->Output[3] = R->RNG & 1; + break; } } @@ -180,22 +132,9 @@ void SN76496Update(short *buffer, int length, int stereo) int i; struct SN76496 *R = &ono_sn; - /* If the volume is 0, increase the counter */ - for (i = 0;i < 4;i++) - { - if (R->Volume[i] == 0) - { - /* note that I do count += length, NOT count = length + 1. You might think */ - /* it's the same since the volume is 0, but doing the latter could cause */ - /* interferencies when the program is rapidly modulating the volume. */ - if (R->Count[i] <= length*STEP) R->Count[i] += length*STEP; - } - } - while (length > 0) { int vol[4]; - unsigned int out; int left; @@ -215,10 +154,17 @@ void SN76496Update(short *buffer, int length, int stereo) /* If we exit the loop in the middle, Output[i] has to be inverted */ /* and vol[i] incremented only if the exit status of the square */ /* wave is 1. */ - while (R->Count[i] <= 0) + if (R->Count[i] < -2*R->Period[i] || R->Volume[i] == 0) { + /* Cut off anything above the Nyquist frequency. */ + /* It will only create aliasing anyway. This is actually an */ + /* ideal lowpass filter with Nyquist corner frequency. */ + vol[i] += STEP/2; // mean value + R->Count[i] = R->Output[i] = 0; + } + while (R->Count[i] < 0) { R->Count[i] += R->Period[i]; - if (R->Count[i] > 0) + if (R->Count[i] >= 0) { R->Output[i] ^= 1; if (R->Output[i]) vol[i] += R->Period[i]; @@ -231,6 +177,7 @@ void SN76496Update(short *buffer, int length, int stereo) } left = STEP; + if (R->Output[3]) vol[3] += R->Count[3]; do { int nextevent; @@ -238,35 +185,60 @@ void SN76496Update(short *buffer, int length, int stereo) if (R->Count[3] < left) nextevent = R->Count[3]; else nextevent = left; - if (R->Output[3]) vol[3] += R->Count[3]; R->Count[3] -= nextevent; if (R->Count[3] <= 0) { - if (R->RNG & 1) R->RNG ^= R->NoiseFB; - R->RNG >>= 1; R->Output[3] = R->RNG & 1; + R->RNG >>= 1; + if (R->Output[3]) + { + R->RNG ^= R->NoiseFB; + vol[3] += R->Period[3]; + } R->Count[3] += R->Period[3]; - if (R->Output[3]) vol[3] += R->Period[3]; } - if (R->Output[3]) vol[3] -= R->Count[3]; left -= nextevent; - } while (left > 0); - - out = vol[0] * R->Volume[0] + vol[1] * R->Volume[1] + - vol[2] * R->Volume[2] + vol[3] * R->Volume[3]; - - if (out > MAX_OUTPUT * STEP) out = MAX_OUTPUT * STEP; - - if ((out /= STEP)) // will be optimized to shift; max 0x47ff = 18431 - *buffer += out; - if(stereo) buffer+=2; // only left for stereo, to be mixed to right later - else buffer++; + } while (left > 0 && R->Volume[3]); + if (R->Output[3]) vol[3] -= R->Count[3]; length--; + if (R->Panning == 0xff || !stereo) { + unsigned int out = + vol[0] * R->Volume[0] + vol[1] * R->Volume[1] + + vol[2] * R->Volume[2] + vol[3] * R->Volume[3]; + + if (out > MAX_OUTPUT * STEP) out = MAX_OUTPUT * STEP; + + out /= STEP; // will be optimized to shift; max 0x4800 = 18432 + *buffer++ += out; + if (stereo) *buffer++ += out; + } else { +#define P(n) !!(R->Panning & (1<<(n))) + unsigned int outl = + vol[0] * R->Volume[0] * P(4) + vol[1] * R->Volume[1] * P(5) + + vol[2] * R->Volume[2] * P(6) + vol[3] * R->Volume[3] * P(7); + unsigned int outr = + vol[0] * R->Volume[0] * P(0) + vol[1] * R->Volume[1] * P(1) + + vol[2] * R->Volume[2] * P(2) + vol[3] * R->Volume[3] * P(3); +#undef P + if (outl > MAX_OUTPUT * STEP) outl = MAX_OUTPUT * STEP; + if (outr > MAX_OUTPUT * STEP) outr = MAX_OUTPUT * STEP; + + outl /= STEP; // will be optimized to shift; max 0x4800 = 18432 + outr /= STEP; // will be optimized to shift; max 0x4800 = 18432 + *buffer++ += outl; + *buffer++ += outr; + } } } +void SN76496Config(int panning) +{ + struct SN76496 *R = &ono_sn; + R->Panning = panning & 0xff; +} + static void SN76496_set_clock(struct SN76496 *R,int clock) { @@ -290,7 +262,7 @@ static void SN76496_set_gain(struct SN76496 *R,int gain) gain &= 0xff; /* increase max output basing on gain (0.2 dB per step) */ - out = MAX_OUTPUT / 3; + out = MAX_OUTPUT / 4.0; while (gain-- > 0) out *= 1.023292992; /* = (10 ^ (0.2/20)) */ @@ -298,7 +270,7 @@ static void SN76496_set_gain(struct SN76496 *R,int gain) for (i = 0;i < 15;i++) { /* limit volume to avoid clipping */ - if (out > MAX_OUTPUT / 3) R->VolTable[i] = MAX_OUTPUT / 3; + if (out > MAX_OUTPUT / 4) R->VolTable[i] = MAX_OUTPUT / 4; else R->VolTable[i] = out; out /= 1.258925412; /* = 10 ^ (2/20) = 2dB */ @@ -307,6 +279,15 @@ static void SN76496_set_gain(struct SN76496 *R,int gain) } +//static +void SN76496_set_clockrate(int clock,int sample_rate) +{ + struct SN76496 *R = &ono_sn; + + R->SampleRate = sample_rate; + SN76496_set_clock(R,clock); +} + //static int SN76496_init(int clock,int sample_rate) { @@ -314,10 +295,8 @@ int SN76496_init(int clock,int sample_rate) int i; //R->Channel = stream_create(0,1, sample_rate,R,SN76496Update); - sn76496_regs = R->Register; - R->SampleRate = sample_rate; - SN76496_set_clock(R,clock); + SN76496_set_clockrate(clock,sample_rate); for (i = 0;i < 4;i++) R->Volume[i] = 0; @@ -330,14 +309,15 @@ int SN76496_init(int clock,int sample_rate) for (i = 0;i < 4;i++) { - R->Output[i] = 0; - R->Period[i] = R->Count[i] = R->UpdateStep; + R->Volume[i] = R->Output[i] = R->Count[i] = 0; + R->Period[i] = R->UpdateStep; } - R->RNG = NG_PRESET; + R->RNG = FB_PNOISE; R->Output[3] = R->RNG & 1; // added SN76496_set_gain(R, 0); + R->Panning = 0xff; return 0; } diff --git a/pico/sound/sn76496.h b/pico/sound/sn76496.h index e0de6eda..8677ea93 100644 --- a/pico/sound/sn76496.h +++ b/pico/sound/sn76496.h @@ -3,6 +3,8 @@ void SN76496Write(int data); void SN76496Update(short *buffer,int length,int stereo); +void SN76496Config(int panning); +void SN76496_set_clockrate(int clock,int sample_rate); int SN76496_init(int clock,int sample_rate); #endif diff --git a/pico/sound/sound.c b/pico/sound/sound.c index ec0e2059..74f9967b 100644 --- a/pico/sound/sound.c +++ b/pico/sound/sound.c @@ -2,110 +2,85 @@ * PicoDrive * (c) Copyright Dave, 2004 * (C) notaz, 2006-2009 + * (C) irixxxx, 2019-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ #include +#include "../pico_int.h" #include "ym2612.h" #include "sn76496.h" -#include "../pico_int.h" -#include "../cd/cue.h" +#include "emu2413/emu2413.h" +#include "resampler.h" #include "mix.h" -#define SIMPLE_WRITE_SOUND 0 +#define YM2612_CH6PAN 0x1b6 // panning register for channel 6 (used for DAC) -void (*PsndMix_32_to_16l)(short *dest, int *src, int count) = mix_32_to_16l_stereo; +void (*PsndMix_32_to_16)(s16 *dest, s32 *src, int count) = mix_32_to_16_stereo; // master int buffer to mix to -static int PsndBuffer[2*(44100+100)/50]; - -// dac -static unsigned short dac_info[312+4]; // pppppppp ppppllll, p - pos in buff, l - length to write for this sample +// +1 for a fill triggered by an instruction overhanging into the next scanline +static s32 PsndBuffer[2*(54000+100)/50+2]; // cdda output buffer -short cdda_out_buffer[2*1152]; - -// for Pico -int PsndRate=0; -int PsndLen=0; // number of mono samples, multiply by 2 for stereo -int PsndLen_exc_add=0; // this is for non-integer sample counts per line, eg. 22050/60 -int PsndLen_exc_cnt=0; -int PsndDacLine=0; -short *PsndOut=NULL; // PCM data buffer - -// timers -int timer_a_next_oflow, timer_a_step; // in z80 cycles -int timer_b_next_oflow, timer_b_step; +s16 cdda_out_buffer[2*1152]; // sn76496 extern int *sn76496_regs; +// FM resampling polyphase FIR +static resampler_t *fmresampler; +static int (*PsndFMUpdate)(s32 *buffer, int length, int stereo, int is_buf_empty); -static void dac_recalculate(void) -{ - int i, dac_cnt, pos, len, lines = Pico.m.pal ? 312 : 262, mid = Pico.m.pal ? 68 : 93; +// ym2413 +static OPLL *opll = NULL; +static struct { + uint32_t adr; + uint8_t reg[sizeof(opll->reg)]; +} opll_buf; - if (PsndLen <= lines) - { - // shrinking algo - dac_cnt = -PsndLen; - len=1; pos=0; - dac_info[225] = 1; - for(i=226; i != 225; i++) - { - if (i >= lines) i = 0; - len = 0; - if(dac_cnt < 0) { - len=1; - pos++; - dac_cnt += lines; - } - dac_cnt -= PsndLen; - dac_info[i] = (pos<<4)|len; - } - } - else - { - // stretching - dac_cnt = PsndLen; - pos=0; - for(i = 225; i != 224; i++) - { - if (i >= lines) i = 0; - len=0; - while(dac_cnt >= 0) { - dac_cnt -= lines; - len++; - } - if (i == mid) // midpoint - while(pos+len < PsndLen/2) { - dac_cnt -= lines; - len++; - } - dac_cnt += PsndLen; - dac_info[i] = (pos<<4)|len; - pos+=len; - } - // last sample - for(len = 0, i = pos; i < PsndLen; i++) len++; - if (PsndLen_exc_add) len++; - dac_info[224] = (pos<<4)|len; - } - mid = (dac_info[lines-1] & 0xfff0) + ((dac_info[lines-1] & 0xf) << 4); - for (i = lines; i < sizeof(dac_info) / sizeof(dac_info[0]); i++) - dac_info[i] = mid; - //for(i=len=0; i < lines; i++) { - // printf("%03i : %03i : %i\n", i, dac_info[i]>>4, dac_info[i]&0xf); - // len+=dac_info[i]&0xf; - //} - //printf("rate is %i, len %f\n", PsndRate, (double)PsndRate/(Pico.m.pal ? 50.0 : 60.0)); - //printf("len total: %i, last pos: %i\n", len, pos); - //exit(8); +void YM2413_regWrite(unsigned data){ + OPLL_writeIO(opll,0,data); +} +void YM2413_dataWrite(unsigned data){ + OPLL_writeIO(opll,1,data); } +PICO_INTERNAL void *YM2413GetRegs(void) +{ + memcpy(opll_buf.reg, opll->reg, sizeof(opll->reg)); + opll_buf.adr = opll->adr; + return &opll_buf; +} + +PICO_INTERNAL void YM2413UnpackState(void) +{ + int i; + + for (i = sizeof(opll->reg)-1; i >= 0; i--) { + OPLL_writeIO(opll, 0, i); + OPLL_writeIO(opll, 1, opll_buf.reg[i]); + } + opll->adr = opll_buf.adr; +} + +PICO_INTERNAL void PsndInit(void) +{ + opll = OPLL_new(OSC_NTSC/15, OSC_NTSC/15/72); + OPLL_setChipType(opll,0); + OPLL_reset(opll); +} + +PICO_INTERNAL void PsndExit(void) +{ + OPLL_delete(opll); + opll = NULL; + + resampler_free(fmresampler); fmresampler = NULL; +} PICO_INTERNAL void PsndReset(void) { @@ -114,314 +89,558 @@ PICO_INTERNAL void PsndReset(void) timers_reset(); } +// FM polyphase FIR resampling +#define FMFIR_TAPS 9 + +// resample FM from its native 53267Hz/52781Hz with polyphase FIR filter +static int ymchans; +static void YM2612Update(s32 *buffer, int length, int stereo) +{ + ymchans = YM2612UpdateOne(buffer, length, stereo, 1); +} + +static int YM2612UpdateFIR(s32 *buffer, int length, int stereo, int is_buf_empty) +{ + resampler_update(fmresampler, buffer, length, YM2612Update); + return ymchans; +} + +// resample SMS FM from its native 49716Hz/49262Hz with polyphase FIR filter +static void YM2413Update(s32 *buffer, int length, int stereo) +{ + while (length-- > 0) { + int16_t getdata = OPLL_calc(opll) * 3; + *buffer++ = getdata; + buffer += stereo; // only left for stereo, to be mixed to right later + } +} + +static int YM2413UpdateFIR(s32 *buffer, int length, int stereo, int is_buf_empty) +{ + if (!is_buf_empty) memset(buffer, 0, (length << stereo) * sizeof(*buffer)); + resampler_update(fmresampler, buffer, length, YM2413Update); + return 0; +} + +// FIR setup, looks for a close enough rational number matching the ratio +static void YMFM_setup_FIR(int inrate, int outrate, int stereo) +{ + int mindiff = 999; + int diff, mul, div; + int minmult = 22, maxmult = 55; // min,max interpolation factor + + // compute filter ratio with largest multiplier for smallest error + for (mul = minmult; mul <= maxmult; mul++) { + div = (inrate*mul + outrate/2) / outrate; + diff = outrate*div/mul - inrate; + if (abs(diff) < abs(mindiff)) { + mindiff = diff; + Pico.snd.fm_fir_mul = mul; + Pico.snd.fm_fir_div = div; + if (abs(mindiff) <= inrate/1000+1) break; // below error limit + } + } + printf("FM polyphase FIR ratio=%d/%d error=%.3f%%\n", + Pico.snd.fm_fir_mul, Pico.snd.fm_fir_div, 100.0*mindiff/inrate); + + resampler_free(fmresampler); + fmresampler = resampler_new(FMFIR_TAPS, Pico.snd.fm_fir_mul, Pico.snd.fm_fir_div, + 0.85, 2, 2*inrate/50, stereo); +} + +// wrapper for the YM2612UpdateONE macro +static int YM2612UpdateONE(s32 *buffer, int length, int stereo, int is_buf_empty) +{ + return YM2612UpdateOne(buffer, length, stereo, is_buf_empty); +} + +static int ymclock; +static int ymrate; +static int ymopts; // to be called after changing sound rate or chips void PsndRerate(int preserve_state) { void *state = NULL; int target_fps = Pico.m.pal ? 50 : 60; + int target_lines = Pico.m.pal ? 313 : 262; + int sms_clock = Pico.m.pal ? OSC_PAL/15 : OSC_NTSC/15; + int ym2413_rate = (sms_clock + 36) / 72; + int ym2612_clock = Pico.m.pal ? OSC_PAL/7 : OSC_NTSC/7; + int ym2612_rate = YM2612_NATIVE_RATE(); + int ym2612_init = !preserve_state; - // not all rates are supported in MCD mode due to mp3 decoder limitations - if (PicoAHW & PAHW_MCD) { - if (!(11025-100 <= PsndRate && PsndRate <= 11025+100) && - !(22050-100 <= PsndRate && PsndRate <= 22050+100) && - !(44100-100 <= PsndRate && PsndRate <= 44100+100)) - PsndRate = 22050; - PicoOpt |= POPT_EN_STEREO; // force stereo - } + // don't init YM2612 if preserve_state and no parameter changes + ym2612_init |= ymclock != ym2612_clock || ymopts != (PicoIn.opt & (POPT_DIS_FM_SSGEG|POPT_EN_FM_DAC)); + ym2612_init |= ymrate != (PicoIn.opt & POPT_EN_FM_FILTER ? ym2612_rate : PicoIn.sndRate); + ymclock = ym2612_clock; + ymrate = (PicoIn.opt & POPT_EN_FM_FILTER ? ym2612_rate : PicoIn.sndRate); + ymopts = PicoIn.opt & (POPT_DIS_FM_SSGEG|POPT_EN_FM_DAC); - if (preserve_state) { + if (preserve_state && ym2612_init) { state = malloc(0x204); if (state == NULL) return; ym2612_pack_state(); memcpy(state, YM2612GetRegs(), 0x204); } - YM2612Init(Pico.m.pal ? OSC_PAL/7 : OSC_NTSC/7, PsndRate); - if (preserve_state) { + + if (PicoIn.AHW & PAHW_SMS) { + OPLL_setRate(opll, ym2413_rate); + if (!preserve_state) + OPLL_reset(opll); + YMFM_setup_FIR(ym2413_rate, PicoIn.sndRate, 0); + PsndFMUpdate = YM2413UpdateFIR; + } else if ((PicoIn.opt & POPT_EN_FM_FILTER) && ym2612_rate != PicoIn.sndRate) { + // polyphase FIR resampler, resampling directly from native to output rate + if (ym2612_init) + YM2612Init(ym2612_clock, ym2612_rate, + ((PicoIn.opt&POPT_DIS_FM_SSGEG) ? 0 : ST_SSG) | + ((PicoIn.opt&POPT_EN_FM_DAC) ? ST_DAC : 0)); + YMFM_setup_FIR(ym2612_rate, PicoIn.sndRate, PicoIn.opt & POPT_EN_STEREO); + PsndFMUpdate = YM2612UpdateFIR; + } else { + if (ym2612_init) + YM2612Init(ym2612_clock, PicoIn.sndRate, + ((PicoIn.opt&POPT_DIS_FM_SSGEG) ? 0 : ST_SSG) | + ((PicoIn.opt&POPT_EN_FM_DAC) ? ST_DAC : 0)); + PsndFMUpdate = YM2612UpdateONE; + } + + if (preserve_state && ym2612_init) { // feed it back it's own registers, just like after loading state memcpy(YM2612GetRegs(), state, 0x204); ym2612_unpack_state(); - if ((PicoAHW & PAHW_MCD) && !(Pico_mcd->s68k_regs[0x36] & 1) && (Pico_mcd->scd.Status_CDC & 1)) - cdda_start_play(); + free(state); } - if (preserve_state) memcpy(state, sn76496_regs, 28*4); // remember old state - SN76496_init(Pico.m.pal ? OSC_PAL/15 : OSC_NTSC/15, PsndRate); - if (preserve_state) memcpy(sn76496_regs, state, 28*4); // restore old state + if (preserve_state) + SN76496_set_clockrate(Pico.m.pal ? OSC_PAL/15 : OSC_NTSC/15, PicoIn.sndRate); + else + SN76496_init(Pico.m.pal ? OSC_PAL/15 : OSC_NTSC/15, PicoIn.sndRate); - if (state) - free(state); + // calculate Pico.snd.len + Pico.snd.len = PicoIn.sndRate / target_fps; + Pico.snd.len_e_add = ((PicoIn.sndRate - Pico.snd.len * target_fps) << 16) / target_fps; + Pico.snd.len_e_cnt = 0; // Q16 - // calculate PsndLen - PsndLen=PsndRate / target_fps; - PsndLen_exc_add=((PsndRate - PsndLen*target_fps)<<16) / target_fps; - PsndLen_exc_cnt=0; - - // recalculate dac info - dac_recalculate(); + // samples per line (Q16) + Pico.snd.smpl_mult = 65536LL * PicoIn.sndRate / (target_fps*target_lines); + // samples per z80 clock (Q20) + Pico.snd.clkl_mult = 16 * Pico.snd.smpl_mult * 15/7 / 488.5; + // samples per 44.1 KHz sample + Pico.snd.cdda_mult = 65536LL * 44100 / PicoIn.sndRate; + Pico.snd.cdda_div = 65536LL * PicoIn.sndRate / 44100; // clear all buffers memset32(PsndBuffer, 0, sizeof(PsndBuffer)/4); memset(cdda_out_buffer, 0, sizeof(cdda_out_buffer)); - if (PsndOut) + if (PicoIn.sndOut) PsndClear(); // set mixer - PsndMix_32_to_16l = (PicoOpt & POPT_EN_STEREO) ? mix_32_to_16l_stereo : mix_32_to_16_mono; + PsndMix_32_to_16 = (PicoIn.opt & POPT_EN_STEREO) ? mix_32_to_16_stereo : mix_32_to_16_mono; + mix_reset(PicoIn.opt & POPT_EN_SNDFILTER ? PicoIn.sndFilterAlpha : 0); - if (PicoAHW & PAHW_PICO) + if (PicoIn.AHW & PAHW_PICO) PicoReratePico(); } -PICO_INTERNAL void PsndDoDAC(int line_to) +PICO_INTERNAL void PsndStartFrame(void) { - int pos, pos1, len; - int dout = ym2612.dacout; - int line_from = PsndDacLine; - - if (line_to >= 312) - line_to = 311; - - PsndDacLine = line_to + 1; - - pos =dac_info[line_from]>>4; - pos1=dac_info[line_to]; - len = ((pos1>>4)-pos) + (pos1&0xf); - if (!len) return; - - if (PicoOpt & POPT_EN_STEREO) { - short *d = PsndOut + pos*2; - for (; len > 0; len--, d+=2) *d = dout; - } else { - short *d = PsndOut + pos; - for (; len > 0; len--, d++) *d = dout; + // compensate for float part of Pico.snd.len + Pico.snd.len_use = Pico.snd.len; + Pico.snd.len_e_cnt += Pico.snd.len_e_add; + if (Pico.snd.len_e_cnt >= 0x10000) { + Pico.snd.len_e_cnt -= 0x10000; + Pico.snd.len_use++; } } -// cdda -static pm_file *cdda_stream = NULL; - -static void cdda_raw_update(int *buffer, int length) +PICO_INTERNAL void PsndDoDAC(int cyc_to) { - int ret, cdda_bytes, mult = 1; - if (cdda_stream == NULL) + int pos, len; + int dout = ym2612.dacout; + + // nothing to do if sound is off + if (!PicoIn.sndOut) return; + + // number of samples to fill in buffer (Q20) + len = (cyc_to * Pico.snd.clkl_mult) - Pico.snd.dac_pos; + + // update position and calculate buffer offset and length + pos = (Pico.snd.dac_pos+0x80000) >> 20; + Pico.snd.dac_pos += len; + len = ((Pico.snd.dac_pos+0x80000) >> 20) - pos; + + // avoid loss of the 1st sample of a new block (Q rounding issues) + if (pos+len == 0) + len = 1, Pico.snd.dac_pos += 0x80000; + if (len <= 0) return; - cdda_bytes = length*4; - if (PsndRate <= 22050 + 100) mult = 2; - if (PsndRate < 22050 - 100) mult = 4; - cdda_bytes *= mult; + // fill buffer, applying a rather weak order 1 bessel IIR on the way + // y[n] = (x[n] + x[n-1])*(1/2) (3dB cutoff at 11025 Hz, no gain) + // 1 sample delay for correct IIR filtering over audio frame boundaries + if (PicoIn.opt & POPT_EN_STEREO) { + s16 *d = PicoIn.sndOut + pos*2; + int pan = ym2612.REGS[YM2612_CH6PAN]; + int l = pan & 0x80 ? Pico.snd.dac_val : 0; + int r = pan & 0x40 ? Pico.snd.dac_val : 0; + *d++ += pan & 0x80 ? Pico.snd.dac_val2 : 0; + *d++ += pan & 0x40 ? Pico.snd.dac_val2 : 0; + while (--len) *d++ += l, *d++ += r; + } else { + s16 *d = PicoIn.sndOut + pos; + *d++ += Pico.snd.dac_val2; + while (--len) *d++ += Pico.snd.dac_val; + } + Pico.snd.dac_val2 = (Pico.snd.dac_val + dout) >> 1; + Pico.snd.dac_val = dout; +} - ret = pm_read(cdda_out_buffer, cdda_bytes, cdda_stream); +PICO_INTERNAL void PsndDoPSG(int cyc_to) +{ + int pos, len; + int stereo = 0; + + // nothing to do if sound is off + if (!PicoIn.sndOut) return; + + // number of samples to fill in buffer (Q20) + len = (cyc_to * Pico.snd.clkl_mult) - Pico.snd.psg_pos; + + // update position and calculate buffer offset and length + pos = (Pico.snd.psg_pos+0x80000) >> 20; + Pico.snd.psg_pos += len; + len = ((Pico.snd.psg_pos+0x80000) >> 20) - pos; + + if (len <= 0) + return; + if (!(PicoIn.opt & POPT_EN_PSG)) + return; + + if (PicoIn.opt & POPT_EN_STEREO) { + stereo = 1; + pos <<= 1; + } + SN76496Update(PicoIn.sndOut + pos, len, stereo); +} + +PICO_INTERNAL void PsndDoSMSFM(int cyc_to) +{ + int pos, len; + int stereo = 0; + s32 *buf32 = PsndBuffer; + s16 *buf = PicoIn.sndOut; + + // nothing to do if sound is off + if (!PicoIn.sndOut) return; + + // number of samples to fill in buffer (Q20) + len = (cyc_to * Pico.snd.clkl_mult) - Pico.snd.ym2413_pos; + + // update position and calculate buffer offset and length + pos = (Pico.snd.ym2413_pos+0x80000) >> 20; + Pico.snd.ym2413_pos += len; + len = ((Pico.snd.ym2413_pos+0x80000) >> 20) - pos; + + if (len <= 0) + return; + if (!(PicoIn.opt & POPT_EN_YM2413)) + return; + + if (PicoIn.opt & POPT_EN_STEREO) { + stereo = 1; + pos <<= 1; + } + + if (Pico.m.hardware & PMS_HW_FMUSED) { + buf += pos; + PsndFMUpdate(buf32, len, 0, 0); + if (stereo) + while (len--) { + *buf++ += *buf32; + *buf++ += *buf32++; + } + else + while (len--) { + *buf++ += *buf32++; + } + } +} + +PICO_INTERNAL void PsndDoFM(int cyc_to) +{ + int pos, len; + int stereo = 0; + + // nothing to do if sound is off + if (!PicoIn.sndOut) return; + + // Q20, number of samples since last call + len = (cyc_to * Pico.snd.clkl_mult) - Pico.snd.fm_pos; + + // update position and calculate buffer offset and length + pos = (Pico.snd.fm_pos+0x80000) >> 20; + Pico.snd.fm_pos += len; + len = ((Pico.snd.fm_pos+0x80000) >> 20) - pos; + if (len <= 0) + return; + + // fill buffer + if (PicoIn.opt & POPT_EN_STEREO) { + stereo = 1; + pos <<= 1; + } + if (PicoIn.opt & POPT_EN_FM) + PsndFMUpdate(PsndBuffer + pos, len, stereo, 1); +} + +PICO_INTERNAL void PsndDoPCM(int cyc_to) +{ + int pos, len; + int stereo = 0; + + // nothing to do if sound is off + if (!PicoIn.sndOut) return; + + // Q20, number of samples since last call + len = (cyc_to * Pico.snd.clkl_mult) - Pico.snd.pcm_pos; + + // update position and calculate buffer offset and length + pos = (Pico.snd.pcm_pos+0x80000) >> 20; + Pico.snd.pcm_pos += len; + len = ((Pico.snd.pcm_pos+0x80000) >> 20) - pos; + if (len <= 0) + return; + + // fill buffer + if (PicoIn.opt & POPT_EN_STEREO) { + stereo = 1; + pos <<= 1; + } + PicoPicoPCMUpdate(PicoIn.sndOut + pos, len, stereo); +} + +// cdda +static void cdda_raw_update(s32 *buffer, int length, int stereo) +{ + int ret, cdda_bytes; + + cdda_bytes = (length * Pico.snd.cdda_mult >> 16) * 4; + + ret = pm_read_audio(cdda_out_buffer, cdda_bytes, Pico_mcd->cdda_stream); if (ret < cdda_bytes) { memset((char *)cdda_out_buffer + ret, 0, cdda_bytes - ret); - cdda_stream = NULL; + Pico_mcd->cdda_stream = NULL; return; } // now mix - switch (mult) { - case 1: mix_16h_to_32(buffer, cdda_out_buffer, length*2); break; - case 2: mix_16h_to_32_s1(buffer, cdda_out_buffer, length*2); break; - case 4: mix_16h_to_32_s2(buffer, cdda_out_buffer, length*2); break; - } + if (stereo) switch (Pico.snd.cdda_mult) { + case 0x10000: mix_16h_to_32(buffer, cdda_out_buffer, length*2); break; + case 0x20000: mix_16h_to_32_s1(buffer, cdda_out_buffer, length*2); break; + case 0x40000: mix_16h_to_32_s2(buffer, cdda_out_buffer, length*2); break; + default: mix_16h_to_32_resample_stereo(buffer, cdda_out_buffer, length, Pico.snd.cdda_mult); + } else + mix_16h_to_32_resample_mono(buffer, cdda_out_buffer, length, Pico.snd.cdda_mult); } -PICO_INTERNAL void cdda_start_play(void) +void cdda_start_play(int lba_base, int lba_offset, int lb_len) { - int lba_offset, index, lba_length, i; - - elprintf(EL_STATUS, "cdda play track #%i", Pico_mcd->scd.Cur_Track); - - index = Pico_mcd->scd.Cur_Track - 1; - - lba_offset = Pico_mcd->scd.Cur_LBA - Track_to_LBA(index + 1); - if (lba_offset < 0) lba_offset = 0; - lba_offset += Pico_mcd->TOC.Tracks[index].Offset; - - // find the actual file for this track - for (i = index; i > 0; i--) - if (Pico_mcd->TOC.Tracks[i].F != NULL) break; - - if (Pico_mcd->TOC.Tracks[i].F == NULL) { - elprintf(EL_STATUS|EL_ANOMALY, "no track?!"); - return; - } - - if (Pico_mcd->TOC.Tracks[i].ftype == CT_MP3) + if (Pico_mcd->cdda_type == CT_MP3) { int pos1024 = 0; - lba_length = Pico_mcd->TOC.Tracks[i].Length; - for (i++; i < Pico_mcd->TOC.Last_Track; i++) { - if (Pico_mcd->TOC.Tracks[i].F != NULL) break; - lba_length += Pico_mcd->TOC.Tracks[i].Length; - } - if (lba_offset) - pos1024 = lba_offset * 1024 / lba_length; + pos1024 = lba_offset * 1024 / lb_len; - mp3_start_play(Pico_mcd->TOC.Tracks[index].F, pos1024); + mp3_start_play(Pico_mcd->cdda_stream, pos1024); return; } - cdda_stream = Pico_mcd->TOC.Tracks[i].F; - PicoCDBufferFlush(); // buffering relies on fp not being touched - pm_seek(cdda_stream, lba_offset * 2352, SEEK_SET); - if (Pico_mcd->TOC.Tracks[i].ftype == CT_WAV) + pm_seek(Pico_mcd->cdda_stream, (lba_base + lba_offset) * 2352, SEEK_SET); + if (Pico_mcd->cdda_type == CT_WAV) { // skip headers, assume it's 44kHz stereo uncompressed - pm_seek(cdda_stream, 44, SEEK_CUR); + pm_seek(Pico_mcd->cdda_stream, 44, SEEK_CUR); } } PICO_INTERNAL void PsndClear(void) { - int len = PsndLen; - if (PsndLen_exc_add) len++; - if (PicoOpt & POPT_EN_STEREO) - memset32((int *) PsndOut, 0, len); // assume PsndOut to be aligned + int len = Pico.snd.len; + if (Pico.snd.len_e_add) len++; + + // drop pos remainder to avoid rounding errors (not entirely correct though) + Pico.snd.dac_pos = Pico.snd.fm_pos = Pico.snd.psg_pos = Pico.snd.ym2413_pos = Pico.snd.pcm_pos = 0; + if (!PicoIn.sndOut) return; + + if (PicoIn.opt & POPT_EN_STEREO) + memset32((int *) PicoIn.sndOut, 0, len); // assume PicoIn.sndOut to be aligned else { - short *out = PsndOut; - if ((long)out & 2) { *out++ = 0; len--; } + s16 *out = PicoIn.sndOut; + if ((uintptr_t)out & 2) { *out++ = 0; len--; } memset32((int *) out, 0, len/2); if (len & 1) out[len-1] = 0; } + if (!(PicoIn.opt & POPT_EN_FM)) + memset32(PsndBuffer, 0, PicoIn.opt & POPT_EN_STEREO ? len*2 : len); } static int PsndRender(int offset, int length) { - int buf32_updated = 0; - int *buf32 = PsndBuffer+offset; - int stereo = (PicoOpt & 8) >> 3; + s32 *buf32; + int stereo = (PicoIn.opt & 8) >> 3; + int fmlen = ((Pico.snd.fm_pos+0x80000) >> 20); + int daclen = ((Pico.snd.dac_pos+0x80000) >> 20); + int psglen = ((Pico.snd.psg_pos+0x80000) >> 20); + int pcmlen = ((Pico.snd.pcm_pos+0x80000) >> 20); - offset <<= stereo; + buf32 = PsndBuffer+(offset<= 0x10000) { - PsndLen_exc_cnt -= 0x10000; - length++; - } + // Add in parts of the PSG output not yet done + if (length-psglen > 0 && PicoIn.sndOut) { + s16 *psgbuf = PicoIn.sndOut + (psglen << stereo); + Pico.snd.psg_pos += (length-psglen) << 20; + if (PicoIn.opt & POPT_EN_PSG) + SN76496Update(psgbuf, length-psglen, stereo); } -#endif - // PSG - if (PicoOpt & POPT_EN_PSG) - SN76496Update(PsndOut+offset, length, stereo); - - if (PicoAHW & PAHW_PICO) { - PicoPicoPCMUpdate(PsndOut+offset, length, stereo); + if (PicoIn.AHW & PAHW_PICO) { + // always need to render sound for interrupts + s16 *buf16 = PicoIn.sndOut ? PicoIn.sndOut + (pcmlen< 0 && PicoIn.sndOut) { + Pico.snd.dac_pos += (length-daclen) << 20; + if (PicoIn.opt & POPT_EN_STEREO) { + s16 *d = PicoIn.sndOut + daclen*2; + int pan = ym2612.REGS[YM2612_CH6PAN]; + int l = pan & 0x80 ? Pico.snd.dac_val : 0; + int r = pan & 0x40 ? Pico.snd.dac_val : 0; + *d++ += pan & 0x80 ? Pico.snd.dac_val2 : 0; + *d++ += pan & 0x40 ? Pico.snd.dac_val2 : 0; + if (l|r) for (daclen++; length-daclen > 0; daclen++) + *d++ += l, *d++ += r; + } else { + s16 *d = PicoIn.sndOut + daclen; + *d++ += Pico.snd.dac_val2; + if (Pico.snd.dac_val) for (daclen++; length-daclen > 0; daclen++) + *d++ += Pico.snd.dac_val; + } + Pico.snd.dac_val2 = Pico.snd.dac_val; + } -//printf("active_chs: %02x\n", buf32_updated); - (void)buf32_updated; + // Add in parts of the FM buffer not yet done + if (length-fmlen > 0 && PicoIn.sndOut) { + s32 *fmbuf = buf32 + ((fmlen-offset) << stereo); + Pico.snd.fm_pos += (length-fmlen) << 20; + if (PicoIn.opt & POPT_EN_FM) + PsndFMUpdate(fmbuf, length-fmlen, stereo, 1); + } // CD: PCM sound - if (PicoAHW & PAHW_MCD) { - pcd_pcm_update(buf32, length, stereo); - //buf32_updated = 1; + if (PicoIn.AHW & PAHW_MCD) { + pcd_pcm_update(buf32, length-offset, stereo); } // CD: CDDA audio // CD mode, cdda enabled, not data track, CDC is reading - if ((PicoAHW & PAHW_MCD) && (PicoOpt & POPT_EN_MCD_CDDA) && - !(Pico_mcd->s68k_regs[0x36] & 1) && (Pico_mcd->scd.Status_CDC & 1)) + if ((PicoIn.AHW & PAHW_MCD) && (PicoIn.opt & POPT_EN_MCD_CDDA) + && Pico_mcd->cdda_stream != NULL + && !(Pico_mcd->s68k_regs[0x36] & 1)) { - // note: only 44, 22 and 11 kHz supported, with forced stereo - int index = Pico_mcd->scd.Cur_Track - 1; - - if (Pico_mcd->TOC.Tracks[index].ftype == CT_MP3) - mp3_update(buf32, length, stereo); + if (Pico_mcd->cdda_type == CT_MP3) + mp3_update(buf32, length-offset, stereo); else - cdda_raw_update(buf32, length); + cdda_raw_update(buf32, length-offset, stereo); } - if ((PicoAHW & PAHW_32X) && (PicoOpt & POPT_EN_PWM)) - p32x_pwm_update(buf32, length, stereo); + if ((PicoIn.AHW & PAHW_32X) && (PicoIn.opt & POPT_EN_PWM)) + p32x_pwm_update(buf32, length-offset, stereo); // convert + limit to normal 16bit output - PsndMix_32_to_16l(PsndOut+offset, buf32, length); + if (PicoIn.sndOut) + PsndMix_32_to_16(PicoIn.sndOut+(offset<> 3; - int length = PsndLen; - -#if !SIMPLE_WRITE_SOUND - // compensate for float part of PsndLen - PsndLen_exc_cnt += PsndLen_exc_add; - if (PsndLen_exc_cnt >= 0x10000) { - PsndLen_exc_cnt -= 0x10000; - length++; - } -#endif - - // PSG - if (PicoOpt & POPT_EN_PSG) - SN76496Update(PsndOut, length, stereo); - - // upmix to "stereo" if needed - if (stereo) { - int i, *p; - for (i = length, p = (void *)PsndOut; i > 0; i--, p++) - *p |= *p << 16; - } - - if (PicoWriteSound != NULL) - PicoWriteSound(length * ((PicoOpt & POPT_EN_STEREO) ? 4 : 2)); + if (PicoIn.writeSound && PicoIn.sndOut) + PicoIn.writeSound(curr_pos * ((PicoIn.opt & POPT_EN_STEREO) ? 4 : 2)); + // clear sound buffer PsndClear(); } +static int PsndRenderMS(int offset, int length) +{ + s32 *buf32 = PsndBuffer; + int stereo = (PicoIn.opt & 8) >> 3; + int psglen = ((Pico.snd.psg_pos+0x80000) >> 20); + int ym2413len = ((Pico.snd.ym2413_pos+0x80000) >> 20); + + if (!PicoIn.sndOut) + return length; + + pprof_start(sound); + + // Add in parts of the PSG output not yet done + if (length-psglen > 0) { + s16 *psgbuf = PicoIn.sndOut + (psglen << stereo); + Pico.snd.psg_pos += (length-psglen) << 20; + if (PicoIn.opt & POPT_EN_PSG) + SN76496Update(psgbuf, length-psglen, stereo); + } + + if (length-ym2413len > 0) { + s16 *ym2413buf = PicoIn.sndOut + (ym2413len << stereo); + Pico.snd.ym2413_pos += (length-ym2413len) << 20; + int len = (length-ym2413len); + if (Pico.m.hardware & PMS_HW_FMUSED) { + PsndFMUpdate(buf32, len, 0, 0); + if (stereo) + while (len--) { + *ym2413buf++ += *buf32; + *ym2413buf++ += *buf32++; + } + else + while (len--) { + *ym2413buf++ += *buf32++; + } + } + } + + pprof_end(sound); + + return length; +} + +PICO_INTERNAL void PsndGetSamplesMS(int y) +{ + static int curr_pos = 0; + + curr_pos = PsndRenderMS(0, Pico.snd.len_use); + + if (PicoIn.writeSound != NULL && PicoIn.sndOut) + PicoIn.writeSound(curr_pos * ((PicoIn.opt & POPT_EN_STEREO) ? 4 : 2)); + PsndClear(); +} + +// vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/sound/ym2612.c b/pico/sound/ym2612.c index efe5054e..ee84da23 100644 --- a/pico/sound/ym2612.c +++ b/pico/sound/ym2612.c @@ -5,6 +5,9 @@ ** ** SSG-EG was also removed, because it's rarely used, Sega2.doc even does not ** document it ("proprietary") and tells to write 0 to SSG-EG control register. +** +** updated with fixes from mame 0.216 (file version 1.5.1) (kub) +** SSG-EG readded from GenPlus (kub) */ /* @@ -111,6 +114,7 @@ #include #include +#include "../pico_int.h" #include "ym2612.h" #ifndef EXTERNAL_YM2612 @@ -124,7 +128,7 @@ extern YM2612 *ym2612_940; #endif -void memset32(int *dest, int c, int count); +void memset32(void *dest, int c, int count); #ifndef __GNUC__ @@ -136,7 +140,7 @@ void memset32(int *dest, int c, int count); #endif #ifndef INLINE -#define INLINE static __inline +#define INLINE __inline #endif #ifndef M_PI @@ -148,7 +152,7 @@ void memset32(int *dest, int c, int count); #define FREQ_SH 16 /* 16.16 fixed point (frequency calculations) */ #define EG_SH 16 /* 16.16 fixed point (envelope generator timing) */ -#define LFO_SH 25 /* 7.25 fixed point (LFO calculations) */ +#define LFO_SH 24 /* 8.24 fixed point (LFO calculations) */ #define TIMER_SH 16 /* 16.16 fixed point (timers calculations) */ #define ENV_BITS 10 @@ -172,16 +176,6 @@ void memset32(int *dest, int c, int count); #define EG_TIMER_OVERFLOW (3*(1< max ) val = max; \ - else if ( val < min ) val = min; \ -} - - /* TL_TAB_LEN is calculated as: * 13 - sinus amplitude bits (Y axis) * 2 - sinus sign bit (Y axis) @@ -199,6 +193,8 @@ UINT16 ym_tl_tab2[13*TL_RES_LEN]; /* sin waveform table in 'decibel' scale (use only period/4 values) */ static UINT16 ym_sin_tab[256]; +static int ym_init_tab; + /* sustain level table (3dB per step) */ /* bit0, bit1, bit2, bit3, bit4, bit5, bit6 */ /* 1, 2, 4, 8, 16, 32, 64 (value)*/ @@ -287,7 +283,7 @@ O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18), O(18),O(18),O(18),O(18),O(18),O(18),O(18),O(18), /* rates 00-11 */ -O( 0),O( 1),O( 2),O( 3), +O(18),O(18),O( 2),O( 3), O( 0),O( 1),O( 2),O( 3), O( 0),O( 1),O( 2),O( 3), O( 0),O( 1),O( 2),O( 3), @@ -328,10 +324,10 @@ O(16),O(16),O(16),O(16),O(16),O(16),O(16),O(16) #define O(a) (a*1) static const UINT8 eg_rate_shift[32+64+32]={ /* Envelope Generator counter shifts (32 + 64 rates + 32 RKS) */ /* 32 infinite time rates */ -O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0), -O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0), -O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0), -O(0),O(0),O(0),O(0),O(0),O(0),O(0),O(0), +O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11), +O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11), +O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11), +O(11),O(11),O(11),O(11),O(11),O(11),O(11),O(11), /* rates 00-11 */ O(11),O(11),O(11),O(11), @@ -517,7 +513,7 @@ static INT32 lfo_pm_table[128*8*32]; /* 128 combinations of 7 bits meaningful (o but LFO works with one more bit of a precision so we really need 4096 elements */ static UINT32 fn_table[4096]; /* fnumber->increment counter */ -static int g_lfo_ampm = 0; +static int g_lfo_ampm; /* register number to channel number , slot offset */ #define OPN_CHAN(N) (N&3) @@ -531,7 +527,7 @@ static int g_lfo_ampm = 0; /* OPN Mode Register Write */ -INLINE void set_timers( int v ) +static INLINE void set_timers( int v ) { /* b7 = CSM MODE */ /* b6 = 3 slot mode */ @@ -552,33 +548,58 @@ INLINE void set_timers( int v ) ym2612.OPN.ST.status &= ~1; } +static INLINE void recalc_volout(FM_SLOT *SLOT) +{ + INT16 vol_out = SLOT->volume; + if ((SLOT->ssg&0x0c) == 0x0c) + vol_out = (0x200 - vol_out) & MAX_ATT_INDEX; + SLOT->vol_out = vol_out + SLOT->tl; +} -INLINE void FM_KEYON(int c , int s ) +static INLINE void FM_KEYON(int c , int s ) { FM_SLOT *SLOT = &ym2612.CH[c].SLOT[s]; if( !SLOT->key ) { SLOT->key = 1; SLOT->phase = 0; /* restart Phase Generator */ - SLOT->state = EG_ATT; /* phase -> Attack */ + SLOT->ssg ^= SLOT->ssgn; + SLOT->ssgn = 0; + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC; + if (SLOT->ar_ksr < 32+62) { + if (SLOT->volume > MIN_ATT_INDEX) SLOT->state = EG_ATT; + } else { + SLOT->volume = MIN_ATT_INDEX; + } + recalc_volout(SLOT); ym2612.slot_mask |= (1<key ) { SLOT->key = 0; - if (SLOT->state>EG_REL) + if (SLOT->state>EG_REL) { SLOT->state = EG_REL;/* phase -> Release */ + if (SLOT->ssg&0x08) { + if (SLOT->ssg&0x04) + SLOT->volume = (0x200 - SLOT->volume); + if (SLOT->volume >= 0x200) { + SLOT->volume = MAX_ATT_INDEX; + SLOT->state = EG_OFF; + } + } + } + SLOT->vol_out = SLOT->volume + SLOT->tl; } } /* set detune & multiple */ -INLINE void set_det_mul(FM_CH *CH, FM_SLOT *SLOT, int v) +static INLINE void set_det_mul(FM_CH *CH, FM_SLOT *SLOT, int v) { SLOT->mul = (v&0x0f)? (v&0x0f)*2 : 1; SLOT->DT = ym2612.OPN.ST.dt_tab[(v>>4)&7]; @@ -586,45 +607,45 @@ INLINE void set_det_mul(FM_CH *CH, FM_SLOT *SLOT, int v) } /* set total level */ -INLINE void set_tl(FM_SLOT *SLOT, int v) +static INLINE void set_tl(FM_SLOT *SLOT, int v) { SLOT->tl = (v&0x7f)<<(ENV_BITS-7); /* 7bit TL */ + if (SLOT->state > EG_REL) + recalc_volout(SLOT); } /* set attack rate & key scale */ -INLINE void set_ar_ksr(FM_CH *CH, FM_SLOT *SLOT, int v) +static INLINE void set_ar_ksr(FM_CH *CH, FM_SLOT *SLOT, int v) { UINT8 old_KSR = SLOT->KSR; + int eg_sh_ar, eg_sel_ar; SLOT->ar = (v&0x1f) ? 32 + ((v&0x1f)<<1) : 0; + SLOT->ar_ksr = SLOT->ar + SLOT->ksr; SLOT->KSR = 3-(v>>6); if (SLOT->KSR != old_KSR) { CH->SLOT[SLOT1].Incr=-1; } + + /* refresh Attack rate */ + if ((SLOT->ar_ksr) < 32+62) + { + eg_sh_ar = eg_rate_shift [SLOT->ar_ksr]; + eg_sel_ar = eg_rate_select[SLOT->ar_ksr]; + } else { - int eg_sh_ar, eg_sel_ar; - - /* refresh Attack rate */ - if ((SLOT->ar + SLOT->ksr) < 32+62) - { - eg_sh_ar = eg_rate_shift [SLOT->ar + SLOT->ksr ]; - eg_sel_ar = eg_rate_select[SLOT->ar + SLOT->ksr ]; - } - else - { - eg_sh_ar = 0; - eg_sel_ar = 17; - } - - SLOT->eg_pack_ar = eg_inc_pack[eg_sel_ar] | (eg_sh_ar<<24); + eg_sh_ar = 0; + eg_sel_ar = 18; } + + SLOT->eg_pack_ar = eg_inc_pack[eg_sel_ar] | (eg_sh_ar<<24); } /* set decay rate */ -INLINE void set_dr(FM_SLOT *SLOT, int v) +static INLINE void set_dr(FM_SLOT *SLOT, int v) { int eg_sh_d1r, eg_sel_d1r; @@ -637,7 +658,7 @@ INLINE void set_dr(FM_SLOT *SLOT, int v) } /* set sustain rate */ -INLINE void set_sr(FM_SLOT *SLOT, int v) +static INLINE void set_sr(FM_SLOT *SLOT, int v) { int eg_sh_d2r, eg_sel_d2r; @@ -650,12 +671,15 @@ INLINE void set_sr(FM_SLOT *SLOT, int v) } /* set release rate */ -INLINE void set_sl_rr(FM_SLOT *SLOT, int v) +static INLINE void set_sl_rr(FM_SLOT *SLOT, int v) { int eg_sh_rr, eg_sel_rr; SLOT->sl = sl_table[ v>>4 ]; + if (SLOT->state == EG_DEC && (SLOT->volume >= (INT32)(SLOT->sl))) + SLOT->state = EG_SUS; + SLOT->rr = 34 + ((v&0x0f)<<2); eg_sh_rr = eg_rate_shift [SLOT->rr + SLOT->ksr]; @@ -666,7 +690,7 @@ INLINE void set_sl_rr(FM_SLOT *SLOT, int v) -INLINE signed int op_calc(UINT32 phase, unsigned int env, signed int pm) +static INLINE signed int op_calc(UINT32 phase, unsigned int env, signed int pm) { int ret, sin = (phase>>16) + (pm>>1); int neg = sin & 0x200; @@ -683,7 +707,7 @@ INLINE signed int op_calc(UINT32 phase, unsigned int env, signed int pm) return neg ? -ret : ret; } -INLINE signed int op_calc1(UINT32 phase, unsigned int env, signed int pm) +static INLINE signed int op_calc1(UINT32 phase, unsigned int env, signed int pm) { int ret, sin = (phase+pm)>>16; int neg = sin & 0x200; @@ -701,7 +725,7 @@ INLINE signed int op_calc1(UINT32 phase, unsigned int env, signed int pm) #if !defined(_ASM_YM2612_C) || defined(EXTERNAL_YM2612) /* advance LFO to next sample */ -INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt) +static INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt) { UINT8 pos; UINT8 prev_pos; @@ -715,12 +739,12 @@ INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt) if (prev_pos != pos) { lfo_ampm &= 0xff; - /* triangle */ + /* triangle (inverted) */ /* AM: 0 to 126 step +2, 126 to 0 step -2 */ if (pos<64) - lfo_ampm |= ((pos&63) * 2) << 8; /* 0 - 126 */ + lfo_ampm |= ((pos^63) * 2) << 8; /* 0 - 126 */ else - lfo_ampm |= (126 - (pos&63)*2) << 8; + lfo_ampm |= ((pos&63) * 2) << 8; } else { @@ -739,83 +763,127 @@ INLINE int advance_lfo(int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt) return lfo_ampm; } -#define EG_INC_VAL() \ - ((1 << ((pack >> ((eg_cnt>>shift)&7)*3)&7)) >> 1) - -INLINE UINT32 update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt) +static INLINE void update_eg_phase(FM_SLOT *SLOT, UINT32 eg_cnt, UINT32 ssg_en) { INT32 volume = SLOT->volume; + UINT32 pack = SLOT->eg_pack[SLOT->state - 1]; + UINT32 shift = pack >> 24; + INT32 eg_inc_val; - switch(SLOT->state) - { - case EG_ATT: /* attack phase */ + if (eg_cnt & ((1 << shift) - 1)) + return; + + eg_inc_val = pack >> ((eg_cnt >> shift) & 7) * 3; + eg_inc_val = (1 << (eg_inc_val & 7)) >> 1; + + if ((SLOT->ssg&0x08) && ssg_en) { + switch (SLOT->state) { - UINT32 pack = SLOT->eg_pack_ar; - UINT32 shift = pack>>24; - if ( !(eg_cnt & ((1<> 4; + if ( volume <= MIN_ATT_INDEX ) { - volume += ( ~volume * EG_INC_VAL() ) >>4; - - if (volume <= MIN_ATT_INDEX) - { - volume = MIN_ATT_INDEX; - SLOT->state = EG_DEC; - } + volume = MIN_ATT_INDEX; + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS: EG_DEC; } break; - } case EG_DEC: /* decay phase */ - { - UINT32 pack = SLOT->eg_pack_d1r; - UINT32 shift = pack>>24; - if ( !(eg_cnt & ((1<= (INT32) SLOT->sl ) - SLOT->state = EG_SUS; - } + if (volume < 0x200) + volume += 4*eg_inc_val; + if ( volume >= (INT32) SLOT->sl ) + SLOT->state = EG_SUS; break; - } case EG_SUS: /* sustain phase */ - { - UINT32 pack = SLOT->eg_pack_d2r; - UINT32 shift = pack>>24; - if ( !(eg_cnt & ((1<= MAX_ATT_INDEX ) - { - volume = MAX_ATT_INDEX; - /* do not change SLOT->state (verified on real chip) */ - } - } + if (volume < 0x200) + volume += 4*eg_inc_val; break; - } case EG_REL: /* release phase */ - { - UINT32 pack = SLOT->eg_pack_rr; - UINT32 shift = pack>>24; - if ( !(eg_cnt & ((1<= 0x200 ) { - volume += EG_INC_VAL(); - - if ( volume >= MAX_ATT_INDEX ) - { - volume = MAX_ATT_INDEX; - SLOT->state = EG_OFF; - } + volume = MAX_ATT_INDEX; + SLOT->state = EG_OFF; } break; } - } + SLOT->vol_out = volume + SLOT->tl; + if ((SLOT->ssg&0x04) && (SLOT->state > EG_REL)) + SLOT->vol_out = ((0x200 - volume) & MAX_ATT_INDEX) + SLOT->tl; + } else { + switch (SLOT->state) + { + case EG_ATT: /* attack phase */ + volume += ( ~volume * eg_inc_val ) >> 4; + if ( volume <= MIN_ATT_INDEX ) + { + volume = MIN_ATT_INDEX; + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS: EG_DEC; + } + break; + + case EG_DEC: /* decay phase */ + volume += eg_inc_val; + if ( volume >= (INT32) SLOT->sl ) + SLOT->state = EG_SUS; + break; + + case EG_SUS: /* sustain phase */ + volume += eg_inc_val; + if ( volume >= MAX_ATT_INDEX ) + { + volume = MAX_ATT_INDEX; + /* do not change SLOT->state (verified on real chip) */ + } + break; + + case EG_REL: /* release phase */ + volume += eg_inc_val; + if ( volume >= MAX_ATT_INDEX ) + { + volume = MAX_ATT_INDEX; + SLOT->state = EG_OFF; + } + break; + } + + SLOT->vol_out = volume + SLOT->tl; + } SLOT->volume = volume; - return SLOT->tl + ((UINT32)volume); /* tl is 7bit<<3, volume 0-1023 (0-2039 total) */ +} + +static INLINE UINT32 update_ssg_eg_phase(FM_SLOT *SLOT, UINT32 phase) +{ + if (SLOT->ssg&0x01) { + if (SLOT->ssg&0x02) { + SLOT->ssg ^= SLOT->ssgn ^ 4; + SLOT->ssgn = 4; + } + + if (SLOT->state != EG_ATT && !(SLOT->ssg&0x04)) + SLOT->volume = MAX_ATT_INDEX; + } else { + if (SLOT->ssg&0x02) { + SLOT->ssg ^= 4; + SLOT->ssgn ^= 4; + } else + phase = 0; + + if (SLOT->state != EG_ATT) { + SLOT->state = (SLOT->sl == MIN_ATT_INDEX) ? EG_SUS : EG_DEC; + if (SLOT->ar_ksr < 32+62) { + if (SLOT->volume > MIN_ATT_INDEX) SLOT->state = EG_ATT; + } else { + SLOT->volume = MIN_ATT_INDEX; + } + } + } + recalc_volout(SLOT); + return phase; } #endif @@ -826,7 +894,8 @@ typedef struct UINT16 vol_out2; UINT16 vol_out3; UINT16 vol_out4; - UINT32 pad[2]; + UINT32 lfo_init_sft16; + UINT32 pad; UINT32 phase1; /* 10 */ UINT32 phase2; UINT32 phase3; @@ -843,7 +912,7 @@ typedef struct UINT32 eg_timer; UINT32 eg_timer_add; UINT32 pack; // 4c: stereo, lastchan, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16] - UINT32 algo; /* 50: algo[3], was_update */ + UINT32 algo; /* 50: algo[3], was_update, unsued, upd_cnt[2], dac */ INT32 op1_out; #ifdef _MIPS_ARCH_ALLEGREX UINT32 pad1[3+8]; @@ -852,7 +921,209 @@ typedef struct #if !defined(_ASM_YM2612_C) || defined(EXTERNAL_YM2612) -static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) +#include +static int clip(int n) +{ + unsigned b = 14, s = n < 0; + int m = s + INT_MAX; + if (s + (n>>(b-1))) n = m >> (8*sizeof(int)-b); + return n; +} + +static void update_ssg_eg_channel(chan_rend_context *ct) +{ + FM_SLOT *SLOT; + + SLOT = &ct->CH->SLOT[SLOT1]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) + ct->phase1 = update_ssg_eg_phase(SLOT, ct->phase1); + SLOT = &ct->CH->SLOT[SLOT2]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) + ct->phase2 = update_ssg_eg_phase(SLOT, ct->phase2); + SLOT = &ct->CH->SLOT[SLOT3]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) + ct->phase3 = update_ssg_eg_phase(SLOT, ct->phase3); + SLOT = &ct->CH->SLOT[SLOT4]; + if ((SLOT->ssg&0x08) && SLOT->state > EG_REL && SLOT->volume >= 0x200) + ct->phase4 = update_ssg_eg_phase(SLOT, ct->phase4); +} + +static void update_eg_phase_channel(chan_rend_context *ct) +{ + FM_SLOT *SLOT; + + SLOT = &ct->CH->SLOT[SLOT1]; + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2); + SLOT = &ct->CH->SLOT[SLOT2]; + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2); + SLOT = &ct->CH->SLOT[SLOT3]; + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2); + SLOT = &ct->CH->SLOT[SLOT4]; + if (SLOT->state != EG_OFF) update_eg_phase(SLOT, ct->eg_cnt, ct->pack & 2); +} + +static int update_algo_channel(chan_rend_context *ct, unsigned int eg_out, unsigned int eg_out2, unsigned int eg_out4) +{ + int m2,c1,c2=0; /* Phase Modulation input for operators 2,3,4 */ + int smp = 0; + + switch( ct->algo&0x7 ) + { + case 0: + { + /* M1---C1---MEM---M2---C2---OUT */ + m2 = ct->mem; + c1 = ct->op1_out>>16; + if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ + ct->mem = op_calc(ct->phase2, eg_out2, c1); + } + else ct->mem = 0; + + if( eg_out < ENV_QUIET ) { /* SLOT 3 */ + c2 = op_calc(ct->phase3, eg_out, m2); + } + if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ + smp = op_calc(ct->phase4, eg_out4, c2); + } + break; + } + case 1: + { + /* M1------+-MEM---M2---C2---OUT */ + /* C1-+ */ + m2 = ct->mem; + ct->mem = ct->op1_out>>16; + if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ + ct->mem+= op_calc(ct->phase2, eg_out2, 0); + } + + if( eg_out < ENV_QUIET ) { /* SLOT 3 */ + c2 = op_calc(ct->phase3, eg_out, m2); + } + if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ + smp = op_calc(ct->phase4, eg_out4, c2); + } + break; + } + case 2: + { + /* M1-----------------+-C2---OUT */ + /* C1---MEM---M2-+ */ + m2 = ct->mem; + c2 = ct->op1_out>>16; + if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ + ct->mem = op_calc(ct->phase2, eg_out2, 0); + } + else ct->mem = 0; + + if( eg_out < ENV_QUIET ) { /* SLOT 3 */ + c2 += op_calc(ct->phase3, eg_out, m2); + } + if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ + smp = op_calc(ct->phase4, eg_out4, c2); + } + break; + } + case 3: + { + /* M1---C1---MEM------+-C2---OUT */ + /* M2-+ */ + c2 = ct->mem; + c1 = ct->op1_out>>16; + if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ + ct->mem = op_calc(ct->phase2, eg_out2, c1); + } + else ct->mem = 0; + + if( eg_out < ENV_QUIET ) { /* SLOT 3 */ + c2 += op_calc(ct->phase3, eg_out, 0); + } + if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ + smp = op_calc(ct->phase4, eg_out4, c2); + } + break; + } + case 4: + { + /* M1---C1-+-OUT */ + /* M2---C2-+ */ + /* MEM: not used */ + + c1 = ct->op1_out>>16; + if( eg_out < ENV_QUIET ) { /* SLOT 3 */ + c2 = op_calc(ct->phase3, eg_out, 0); + } + if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ + smp = op_calc(ct->phase2, eg_out2, c1); + } + if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ + smp+= op_calc(ct->phase4, eg_out4, c2); + } + break; + } + case 5: + { + /* +----C1----+ */ + /* M1-+-MEM---M2-+-OUT */ + /* +----C2----+ */ + m2 = ct->mem; + ct->mem = c1 = c2 = ct->op1_out>>16; + + if( eg_out < ENV_QUIET ) { /* SLOT 3 */ + smp = op_calc(ct->phase3, eg_out, m2); + } + if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ + smp+= op_calc(ct->phase2, eg_out2, c1); + } + if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ + smp+= op_calc(ct->phase4, eg_out4, c2); + } + break; + } + case 6: + { + /* M1---C1-+ */ + /* M2-+-OUT */ + /* C2-+ */ + /* MEM: not used */ + + c1 = ct->op1_out>>16; + if( eg_out < ENV_QUIET ) { /* SLOT 3 */ + smp = op_calc(ct->phase3, eg_out, 0); + } + if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ + smp+= op_calc(ct->phase2, eg_out2, c1); + } + if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ + smp+= op_calc(ct->phase4, eg_out4, 0); + } + break; + } + case 7: + { + /* M1-+ */ + /* C1-+-OUT */ + /* M2-+ */ + /* C2-+ */ + /* MEM: not used*/ + + smp = ct->op1_out>>16; + if( eg_out < ENV_QUIET ) { /* SLOT 3 */ + smp += op_calc(ct->phase3, eg_out, 0); + } + if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ + smp += op_calc(ct->phase2, eg_out2, 0); + } + if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ + smp += op_calc(ct->phase4, eg_out4, 0); + } + break; + } + } + return smp; +} + +static void chan_render_loop(chan_rend_context *ct, s32 *buffer, int length) { int scounter; /* sample counter */ @@ -862,34 +1133,47 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) int smp = 0; /* produced sample */ unsigned int eg_out, eg_out2, eg_out4; + ct->eg_timer += ct->eg_timer_add; + + while (ct->eg_timer >= 1<eg_timer -= 1<pack & 2) + update_ssg_eg_channel(ct); + + if (ct->algo & 0x30) + ct->algo -= 0x10; + if (!(ct->algo & 0x30)) { + ct->algo |= 0x30; + ct->eg_cnt++; + if (ct->eg_cnt >= 4096) ct->eg_cnt = 1; + + update_eg_phase_channel(ct); + } + } + + ct->vol_out1 = ct->CH->SLOT[SLOT1].vol_out; + ct->vol_out2 = ct->CH->SLOT[SLOT2].vol_out; + ct->vol_out3 = ct->CH->SLOT[SLOT3].vol_out; + ct->vol_out4 = ct->CH->SLOT[SLOT4].vol_out; + + if (ct->pack & 4) goto disabled; /* output disabled */ + if (ct->pack & 8) { /* LFO enabled ? (test Earthworm Jim in between demo 1 and 2) */ ct->pack = (ct->pack&0xffff) | (advance_lfo(ct->pack >> 16, ct->lfo_cnt, ct->lfo_cnt + ct->lfo_inc) << 16); ct->lfo_cnt += ct->lfo_inc; } - ct->eg_timer += ct->eg_timer_add; - while (ct->eg_timer >= EG_TIMER_OVERFLOW) - { - ct->eg_timer -= EG_TIMER_OVERFLOW; - ct->eg_cnt++; - - if (ct->CH->SLOT[SLOT1].state != EG_OFF) ct->vol_out1 = update_eg_phase(&ct->CH->SLOT[SLOT1], ct->eg_cnt); - if (ct->CH->SLOT[SLOT2].state != EG_OFF) ct->vol_out2 = update_eg_phase(&ct->CH->SLOT[SLOT2], ct->eg_cnt); - if (ct->CH->SLOT[SLOT3].state != EG_OFF) ct->vol_out3 = update_eg_phase(&ct->CH->SLOT[SLOT3], ct->eg_cnt); - if (ct->CH->SLOT[SLOT4].state != EG_OFF) ct->vol_out4 = update_eg_phase(&ct->CH->SLOT[SLOT4], ct->eg_cnt); - } - - if (ct->pack & 4) continue; /* output disabled */ - /* calculate channel sample */ eg_out = ct->vol_out1; - if ( (ct->pack & 8) && (ct->pack&(1<<(SLOT1+8))) ) eg_out += ct->pack >> (((ct->pack&0xc0)>>6)+24); + if ( (ct->pack & 8) && (ct->pack&(1<<(SLOT1+8))) ) + eg_out += ct->pack >> (((ct->pack&0xc0)>>6)+24); if( eg_out < ENV_QUIET ) /* SLOT 1 */ { int out = 0; - if (ct->pack&0xf000) out = ((ct->op1_out>>16) + ((ct->op1_out<<16)>>16)) << ((ct->pack&0xf000)>>12); /* op1_out0 + op1_out1 */ + if (ct->pack&0xf000) out = ((ct->op1_out + (ct->op1_out<<16))>>16) << ((ct->pack&0xf000)>>12); /* op1_out0 + op1_out1 */ ct->op1_out <<= 16; ct->op1_out |= (unsigned short)op_calc1(ct->phase1, eg_out, out); } else { @@ -907,162 +1191,22 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) if (ct->pack & (1<<(SLOT4+8))) eg_out4 += add; } - switch( ct->CH->ALGO ) - { - case 0: - { - /* M1---C1---MEM---M2---C2---OUT */ - int m2,c1,c2=0; /* Phase Modulation input for operators 2,3,4 */ - m2 = ct->mem; - c1 = ct->op1_out>>16; - if( eg_out < ENV_QUIET ) { /* SLOT 3 */ - c2 = op_calc(ct->phase3, eg_out, m2); - } - if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ - ct->mem = op_calc(ct->phase2, eg_out2, c1); - } - else ct->mem = 0; - if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ - smp = op_calc(ct->phase4, eg_out4, c2); - } - break; - } - case 1: - { - /* M1------+-MEM---M2---C2---OUT */ - /* C1-+ */ - int m2,c2=0; - m2 = ct->mem; - ct->mem = ct->op1_out>>16; - if( eg_out < ENV_QUIET ) { /* SLOT 3 */ - c2 = op_calc(ct->phase3, eg_out, m2); - } - if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ - ct->mem+= op_calc(ct->phase2, eg_out2, 0); - } - if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ - smp = op_calc(ct->phase4, eg_out4, c2); - } - break; - } - case 2: - { - /* M1-----------------+-C2---OUT */ - /* C1---MEM---M2-+ */ - int m2,c2; - m2 = ct->mem; - c2 = ct->op1_out>>16; - if( eg_out < ENV_QUIET ) { /* SLOT 3 */ - c2 += op_calc(ct->phase3, eg_out, m2); - } - if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ - ct->mem = op_calc(ct->phase2, eg_out2, 0); - } - else ct->mem = 0; - if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ - smp = op_calc(ct->phase4, eg_out4, c2); - } - break; - } - case 3: - { - /* M1---C1---MEM------+-C2---OUT */ - /* M2-+ */ - int c1,c2; - c2 = ct->mem; - c1 = ct->op1_out>>16; - if( eg_out < ENV_QUIET ) { /* SLOT 3 */ - c2 += op_calc(ct->phase3, eg_out, 0); - } - if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ - ct->mem = op_calc(ct->phase2, eg_out2, c1); - } - else ct->mem = 0; - if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ - smp = op_calc(ct->phase4, eg_out4, c2); - } - break; - } - case 4: - { - /* M1---C1-+-OUT */ - /* M2---C2-+ */ - /* MEM: not used */ - int c1,c2=0; - c1 = ct->op1_out>>16; - if( eg_out < ENV_QUIET ) { /* SLOT 3 */ - c2 = op_calc(ct->phase3, eg_out, 0); - } - if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ - smp = op_calc(ct->phase2, eg_out2, c1); - } - if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ - smp+= op_calc(ct->phase4, eg_out4, c2); - } - break; - } - case 5: - { - /* +----C1----+ */ - /* M1-+-MEM---M2-+-OUT */ - /* +----C2----+ */ - int m2,c1,c2; - m2 = ct->mem; - ct->mem = c1 = c2 = ct->op1_out>>16; - if( eg_out < ENV_QUIET ) { /* SLOT 3 */ - smp = op_calc(ct->phase3, eg_out, m2); - } - if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ - smp+= op_calc(ct->phase2, eg_out2, c1); - } - if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ - smp+= op_calc(ct->phase4, eg_out4, c2); - } - break; - } - case 6: - { - /* M1---C1-+ */ - /* M2-+-OUT */ - /* C2-+ */ - /* MEM: not used */ - int c1; - c1 = ct->op1_out>>16; - if( eg_out < ENV_QUIET ) { /* SLOT 3 */ - smp = op_calc(ct->phase3, eg_out, 0); - } - if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ - smp+= op_calc(ct->phase2, eg_out2, c1); - } - if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ - smp+= op_calc(ct->phase4, eg_out4, 0); - } - break; - } - case 7: - { - /* M1-+ */ - /* C1-+-OUT */ - /* M2-+ */ - /* C2-+ */ - /* MEM: not used*/ - smp = ct->op1_out>>16; - if( eg_out < ENV_QUIET ) { /* SLOT 3 */ - smp += op_calc(ct->phase3, eg_out, 0); - } - if( eg_out2 < ENV_QUIET ) { /* SLOT 2 */ - smp += op_calc(ct->phase2, eg_out2, 0); - } - if( eg_out4 < ENV_QUIET ) { /* SLOT 4 */ - smp += op_calc(ct->phase4, eg_out4, 0); - } - break; - } - } + smp = update_algo_channel(ct, eg_out, eg_out2, eg_out4); /* done calculating channel sample */ +disabled: + /* update phase counters AFTER output calculations */ + ct->phase1 += ct->incr1; + ct->phase2 += ct->incr2; + ct->phase3 += ct->incr3; + ct->phase4 += ct->incr4; /* mix sample to output buffer */ if (smp) { + smp = clip(smp); /* saturate to 14 bit */ + if (ct->algo & 0x80) { + smp &= ~0x1f; /* drop bits (DAC has 9 bits) */ + smp -= (smp < 0 ? 7:0) << 5; /* discontinuity */ + } if (ct->pack & 1) { /* stereo */ if (ct->pack & 0x20) /* L */ /* TODO: check correctness */ buffer[scounter*2] += smp; @@ -1071,18 +1215,12 @@ static void chan_render_loop(chan_rend_context *ct, int *buffer, int length) } else { buffer[scounter] += smp; } - ct->algo = 8; // algo is only used in asm, here only bit3 is used + ct->algo |= 8; } - - /* update phase counters AFTER output calculations */ - ct->phase1 += ct->incr1; - ct->phase2 += ct->incr2; - ct->phase3 += ct->incr3; - ct->phase4 += ct->incr4; } } #else -void chan_render_loop(chan_rend_context *ct, int *buffer, unsigned short length); +void chan_render_loop(chan_rend_context *ct, s32 *buffer, unsigned short length); #endif static chan_rend_context crct; @@ -1090,28 +1228,55 @@ static chan_rend_context crct; static void chan_render_prep(void) { crct.eg_timer_add = ym2612.OPN.eg_timer_add; + crct.lfo_init_sft16 = g_lfo_ampm << 16; crct.lfo_inc = ym2612.OPN.lfo_inc; } -static void chan_render_finish(void) +static void chan_render_finish(s32 *buffer, int length, int active_chans) { ym2612.OPN.eg_cnt = crct.eg_cnt; ym2612.OPN.eg_timer = crct.eg_timer; - g_lfo_ampm = crct.pack >> 16; // need_save - ym2612.OPN.lfo_cnt = crct.lfo_cnt; + ym2612.OPN.lfo_cnt += ym2612.OPN.lfo_inc * length; } -static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: stereo, ?, disabled, ?, pan_r, pan_l +static UINT32 update_lfo_phase(const FM_SLOT *SLOT, UINT32 block_fnum) +{ + UINT32 fnum_lfo; + INT32 lfo_fn_table_index_offset; + UINT8 blk; + UINT32 fn; + int fc,fdt; + + fnum_lfo = ((block_fnum & 0x7f0) >> 4) * 32 * 8; + lfo_fn_table_index_offset = lfo_pm_table[ fnum_lfo + crct.CH->pms + ((crct.pack>>16)&0xff) ]; + if (lfo_fn_table_index_offset) /* LFO phase modulation active */ + { + block_fnum = block_fnum*2 + lfo_fn_table_index_offset; + blk = (block_fnum&0x7000) >> 12; + fn = block_fnum & 0xfff; + + /* phase increment counter */ + fc = (fn_table[fn]>>(7-blk)); + + fdt = fc + SLOT->DT[crct.CH->kcode]; + if (fdt < 0) fdt += fn_table[0x7ff*2] >> 2; + + return (fdt * SLOT->mul) >> 1; + } else + return SLOT->Incr; +} + +static int chan_render(s32 *buffer, int length, int c, UINT32 flags) // flags: stereo, ?, disabled, ?, pan_r, pan_l { crct.CH = &ym2612.CH[c]; crct.mem = crct.CH->mem_value; /* one sample delay memory */ crct.lfo_cnt = ym2612.OPN.lfo_cnt; - flags &= 0x35; + flags &= 0x37; if (crct.lfo_inc) { flags |= 8; - flags |= g_lfo_ampm << 16; + flags |= crct.lfo_init_sft16; flags |= crct.CH->AMmasks << 8; if (crct.CH->ams == 8) // no ams flags &= ~0xf00; @@ -1129,51 +1294,25 @@ static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: s crct.phase3 = crct.CH->SLOT[SLOT3].phase; crct.phase4 = crct.CH->SLOT[SLOT4].phase; - /* current output from EG circuit (without AM from LFO) */ - crct.vol_out1 = crct.CH->SLOT[SLOT1].tl + ((UINT32)crct.CH->SLOT[SLOT1].volume); - crct.vol_out2 = crct.CH->SLOT[SLOT2].tl + ((UINT32)crct.CH->SLOT[SLOT2].volume); - crct.vol_out3 = crct.CH->SLOT[SLOT3].tl + ((UINT32)crct.CH->SLOT[SLOT3].volume); - crct.vol_out4 = crct.CH->SLOT[SLOT4].tl + ((UINT32)crct.CH->SLOT[SLOT4].volume); - crct.op1_out = crct.CH->op1_out; crct.algo = crct.CH->ALGO & 7; + crct.algo |= crct.CH->upd_cnt << 4; + if (ym2612.OPN.ST.flags & ST_DAC) + crct.algo |= 0x80; - if(crct.CH->pms) + if(crct.CH->pms && (ym2612.OPN.ST.mode & 0xC0) && c == 2) { + /* 3 slot mode */ + crct.incr1 = update_lfo_phase(&crct.CH->SLOT[SLOT1], ym2612.OPN.SL3.block_fnum[1]); + crct.incr2 = update_lfo_phase(&crct.CH->SLOT[SLOT2], ym2612.OPN.SL3.block_fnum[2]); + crct.incr3 = update_lfo_phase(&crct.CH->SLOT[SLOT3], ym2612.OPN.SL3.block_fnum[0]); + crct.incr4 = update_lfo_phase(&crct.CH->SLOT[SLOT4], crct.CH->block_fnum); + } + else if(crct.CH->pms) { - /* add support for 3 slot mode */ - UINT32 block_fnum = crct.CH->block_fnum; - - UINT32 fnum_lfo = ((block_fnum & 0x7f0) >> 4) * 32 * 8; - INT32 lfo_fn_table_index_offset = lfo_pm_table[ fnum_lfo + crct.CH->pms + ((crct.pack>>16)&0xff) ]; - - if (lfo_fn_table_index_offset) /* LFO phase modulation active */ - { - UINT8 blk; - UINT32 fn; - int kc,fc; - - blk = block_fnum >> 11; - block_fnum = block_fnum*2 + lfo_fn_table_index_offset; - - fn = block_fnum & 0xfff; - - /* keyscale code */ - kc = (blk<<2) | opn_fktable[fn >> 8]; - /* phase increment counter */ - fc = fn_table[fn]>>(7-blk); - - crct.incr1 = ((fc+crct.CH->SLOT[SLOT1].DT[kc])*crct.CH->SLOT[SLOT1].mul) >> 1; - crct.incr2 = ((fc+crct.CH->SLOT[SLOT2].DT[kc])*crct.CH->SLOT[SLOT2].mul) >> 1; - crct.incr3 = ((fc+crct.CH->SLOT[SLOT3].DT[kc])*crct.CH->SLOT[SLOT3].mul) >> 1; - crct.incr4 = ((fc+crct.CH->SLOT[SLOT4].DT[kc])*crct.CH->SLOT[SLOT4].mul) >> 1; - } - else /* LFO phase modulation = zero */ - { - crct.incr1 = crct.CH->SLOT[SLOT1].Incr; - crct.incr2 = crct.CH->SLOT[SLOT2].Incr; - crct.incr3 = crct.CH->SLOT[SLOT3].Incr; - crct.incr4 = crct.CH->SLOT[SLOT4].Incr; - } + crct.incr1 = update_lfo_phase(&crct.CH->SLOT[SLOT1], crct.CH->block_fnum); + crct.incr2 = update_lfo_phase(&crct.CH->SLOT[SLOT2], crct.CH->block_fnum); + crct.incr3 = update_lfo_phase(&crct.CH->SLOT[SLOT3], crct.CH->block_fnum); + crct.incr4 = update_lfo_phase(&crct.CH->SLOT[SLOT4], crct.CH->block_fnum); } else /* no LFO phase modulation */ { @@ -1196,12 +1335,13 @@ static int chan_render(int *buffer, int length, int c, UINT32 flags) // flags: s } else ym2612.slot_mask &= ~(0xf << (c*4)); + crct.CH->upd_cnt = (crct.algo >> 4) & 0x7; return (crct.algo & 8) >> 3; // had output } /* update phase increment and envelope generator */ -INLINE void refresh_fc_eg_slot(FM_SLOT *SLOT, int fc, int kc) +static INLINE void refresh_fc_eg_slot(FM_SLOT *SLOT, int fc, int kc) { int ksr, fdt; @@ -1217,17 +1357,18 @@ INLINE void refresh_fc_eg_slot(FM_SLOT *SLOT, int fc, int kc) { int eg_sh, eg_sel; SLOT->ksr = ksr; + SLOT->ar_ksr = SLOT->ar + ksr; /* calculate envelope generator rates */ - if ((SLOT->ar + ksr) < 32+62) + if ((SLOT->ar_ksr) < 32+62) { - eg_sh = eg_rate_shift [SLOT->ar + ksr ]; - eg_sel = eg_rate_select[SLOT->ar + ksr ]; + eg_sh = eg_rate_shift [SLOT->ar_ksr]; + eg_sel = eg_rate_select[SLOT->ar_ksr]; } else { eg_sh = 0; - eg_sel = 17; + eg_sel = 18; } SLOT->eg_pack_ar = eg_inc_pack[eg_sel] | (eg_sh<<24); @@ -1250,7 +1391,7 @@ INLINE void refresh_fc_eg_slot(FM_SLOT *SLOT, int fc, int kc) } /* update phase increment counters */ -INLINE void refresh_fc_eg_chan(FM_CH *CH) +static INLINE void refresh_fc_eg_chan(FM_CH *CH) { if( CH->SLOT[SLOT1].Incr==-1){ int fc = CH->fc; @@ -1262,7 +1403,7 @@ INLINE void refresh_fc_eg_chan(FM_CH *CH) } } -INLINE void refresh_fc_eg_chan_sl3(void) +static INLINE void refresh_fc_eg_chan_sl3(void) { if( ym2612.CH[2].SLOT[SLOT1].Incr==-1) { @@ -1282,7 +1423,7 @@ static void init_timetables(const UINT8 *dttable) /* DeTune table */ for (d = 0;d <= 3;d++){ for (i = 0;i <= 31;i++){ - rate = ((double)dttable[d*32 + i]) * SIN_LEN * ym2612.OPN.ST.freqbase * (1<ssg = v&0x0f; + SLOT->ssg ^= SLOT->ssgn; + if (v&0x08) ym2612.ssg_mask |= 1<<(OPN_SLOT(r) + c*4); + else ym2612.ssg_mask &= ~(1<<(OPN_SLOT(r) + c*4)); + if (SLOT->state > EG_REL) + recalc_volout(SLOT); break; case 0xa0: switch( OPN_SLOT(r) ){ case 0: /* 0xa0-0xa2 : FNUM1 | depends on fn_h (below) */ { - UINT32 fn = (((UINT32)( (CH->fn_h)&7))<<8) + v; - UINT8 blk = CH->fn_h>>3; + UINT32 fn = ((UINT32)(ym2612.OPN.ST.fn_h & 7) << 8) | v; + UINT8 blk = ym2612.OPN.ST.fn_h >> 3; /* keyscale code */ CH->kcode = (blk<<2) | opn_fktable[fn >> 7]; /* phase increment counter */ @@ -1533,7 +1690,7 @@ static int OPNWriteReg(int r, int v) } break; case 1: /* 0xa4-0xa6 : FNUM2,BLK */ - CH->fn_h = v&0x3f; + ym2612.OPN.ST.fn_h = v & 0x3f; ret = 0; break; case 2: /* 0xa8-0xaa : 3CH FNUM1 */ @@ -1603,10 +1760,11 @@ static int OPNWriteReg(int r, int v) /*******************************************************************************/ /* Generate samples for YM2612 */ -int YM2612UpdateOne_(int *buffer, int length, int stereo, int is_buf_empty) +int YM2612UpdateOne_(s32 *buffer, int length, int stereo, int is_buf_empty) { int pan; int active_chs = 0; + int flags = stereo ? 1:0; // if !is_buf_empty, it means it has valid samples to mix with, else it may contain trash if (is_buf_empty) memset32(buffer, 0, length<>2)) << 3; - if (ym2612.slot_mask & 0x0f0000) active_chs |= chan_render(buffer, length, 4, stereo|((pan&0x300)>>4)) << 4; - if (ym2612.slot_mask & 0xf00000) active_chs |= chan_render(buffer, length, 5, stereo|((pan&0xc00)>>6)|(ym2612.dacen<<2)) << 5; - chan_render_finish(); +#define BIT_IF(v,b,c) { v &= ~(1<<(b)); if (c) v |= 1<<(b); } + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x00000f) && (ym2612.OPN.ST.flags & 1)); + if (ym2612.slot_mask & 0x00000f) active_chs |= chan_render(buffer, length, 0, flags|((pan&0x003)<<4)) << 0; + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x0000f0) && (ym2612.OPN.ST.flags & 1)); + if (ym2612.slot_mask & 0x0000f0) active_chs |= chan_render(buffer, length, 1, flags|((pan&0x00c)<<2)) << 1; + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x000f00) && (ym2612.OPN.ST.flags & 1)); + if (ym2612.slot_mask & 0x000f00) active_chs |= chan_render(buffer, length, 2, flags|((pan&0x030) )) << 2; + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x00f000) && (ym2612.OPN.ST.flags & 1)); + if (ym2612.slot_mask & 0x00f000) active_chs |= chan_render(buffer, length, 3, flags|((pan&0x0c0)>>2)) << 3; + BIT_IF(flags, 1, (ym2612.ssg_mask & 0x0f0000) && (ym2612.OPN.ST.flags & 1)); + if (ym2612.slot_mask & 0x0f0000) active_chs |= chan_render(buffer, length, 4, flags|((pan&0x300)>>4)) << 4; + g_lfo_ampm = crct.pack >> 16; // need_save; now because ch5 might skip updating it + BIT_IF(flags, 1, (ym2612.ssg_mask & 0xf00000) && (ym2612.OPN.ST.flags & 1)); + if (ym2612.slot_mask & 0xf00000) active_chs |= chan_render(buffer, length, 5, flags|((pan&0xc00)>>6)|(!!ym2612.dacen<<2)) << 5; +#undef BIT_IF + chan_render_finish(buffer, length, active_chs); return active_chs; // 1 if buffer updated } /* initialize YM2612 emulator */ -void YM2612Init_(int clock, int rate) +void YM2612Init_(int clock, int rate, int flags) { memset(&ym2612, 0, sizeof(ym2612)); init_tables(); ym2612.OPN.ST.clock = clock; ym2612.OPN.ST.rate = rate; + ym2612.OPN.ST.flags = flags; OPNSetPres( 6*24 ); @@ -1687,6 +1854,9 @@ void YM2612ResetChip_(void) ym2612.OPN.eg_timer = 0; ym2612.OPN.eg_cnt = 0; + ym2612.OPN.lfo_inc = 0; + ym2612.OPN.lfo_cnt = 0; + g_lfo_ampm = 126 << 8; ym2612.OPN.ST.status = 0; reset_channels( &ym2612.CH[0] ); @@ -1705,6 +1875,7 @@ void YM2612ResetChip_(void) for(i = 0x26 ; i >= 0x20 ; i-- ) OPNWriteReg(i,0); /* DAC mode clear */ ym2612.dacen = 0; + ym2612.dacout = 0; ym2612.addr_A1 = 0; } @@ -1719,22 +1890,20 @@ int YM2612Write_(unsigned int a, unsigned int v) v &= 0xff; /* adjust to 8 bit bus */ - switch( a&3){ + switch( a & 3 ){ case 0: /* address port 0 */ + case 2: /* address port 1 */ + /* reminder: this is not used, see ym2612_write_local() */ ym2612.OPN.ST.address = v; - ym2612.addr_A1 = 0; - ret=0; + ym2612.addr_A1 = (a & 2) >> 1; + ret = 0; break; - case 1: /* data port 0 */ - if (ym2612.addr_A1 != 0) { - ret=0; - break; /* verified on real YM2608 */ - } + case 1: + case 3: /* data port */ + addr = ym2612.OPN.ST.address | ((int)ym2612.addr_A1 << 8); - addr = ym2612.OPN.ST.address; - - switch( addr & 0xf0 ) + switch( addr & 0x1f0 ) { case 0x20: /* 0x20-0x2f Mode */ switch( addr ) @@ -1747,6 +1916,8 @@ int YM2612Write_(unsigned int a, unsigned int v) else { ym2612.OPN.lfo_inc = 0; + ym2612.OPN.lfo_cnt = 0; + g_lfo_ampm = 126 << 8; } break; #if 0 // handled elsewhere @@ -1816,23 +1987,6 @@ int YM2612Write_(unsigned int a, unsigned int v) ret = OPNWriteReg(addr,v); } break; - - case 2: /* address port 1 */ - ym2612.OPN.ST.address = v; - ym2612.addr_A1 = 1; - ret=0; - break; - - case 3: /* data port 1 */ - if (ym2612.addr_A1 != 1) { - ret=0; - break; /* verified on real YM2608 */ - } - - addr = ym2612.OPN.ST.address | 0x100; - - ret = OPNWriteReg(addr, v); - break; } return ret; @@ -1895,7 +2049,7 @@ typedef struct UINT32 eg_timer; UINT32 lfo_cnt; UINT16 lfo_ampm; - UINT16 unused2; + INT16 busy_timer; UINT32 keyon_field; // 20 UINT32 kcode_fc_sl3_3; UINT32 reserved[2]; @@ -1909,7 +2063,7 @@ typedef struct } ym_save_addon2; -void YM2612PicoStateSave2(int tat, int tbt) +void YM2612PicoStateSave2(int tat, int tbt, int busy) { ym_save_addon_slot ss; ym_save_addon2 sa2; @@ -1967,10 +2121,11 @@ void YM2612PicoStateSave2(int tat, int tbt) sa.eg_timer = ym2612.OPN.eg_timer; sa.lfo_cnt = ym2612.OPN.lfo_cnt; sa.lfo_ampm = g_lfo_ampm; + sa.busy_timer = busy; memcpy(ptr, &sa, sizeof(sa)); // 0x30 max } -int YM2612PicoStateLoad2(int *tat, int *tbt) +int YM2612PicoStateLoad2(int *tat, int *tbt, int *busy) { ym_save_addon_slot ss; ym_save_addon2 sa2; @@ -1996,6 +2151,7 @@ int YM2612PicoStateLoad2(int *tat, int *tbt) g_lfo_ampm = sa.lfo_ampm; if (tat != NULL) *tat = sa.TAT; if (tbt != NULL) *tbt = sa.TBT; + if (busy != NULL) *busy = sa.busy_timer; // chans 1,2,3 ptr = &ym2612.REGS[0x0b8]; @@ -2016,6 +2172,7 @@ int YM2612PicoStateLoad2(int *tat, int *tbt) blk = ym2612.CH[c].block_fnum >> 11; ym2612.CH[c].kcode= (blk<<2) | opn_fktable[fn >> 7]; ym2612.CH[c].fc = fn_table[fn*2]>>(7-blk); + refresh_fc_eg_chan( &ym2612.CH[c] ); } // chans 4,5,6 ptr = &ym2612.REGS[0x1b8]; @@ -2036,6 +2193,7 @@ int YM2612PicoStateLoad2(int *tat, int *tbt) blk = ym2612.CH[c].block_fnum >> 11; ym2612.CH[c].kcode= (blk<<2) | opn_fktable[fn >> 7]; ym2612.CH[c].fc = fn_table[fn*2]>>(7-blk); + refresh_fc_eg_chan( &ym2612.CH[c] ); } for (c = 0; c < 3; c++) { diff --git a/pico/sound/ym2612.h b/pico/sound/ym2612.h index f5e98a05..56ec5ef9 100644 --- a/pico/sound/ym2612.h +++ b/pico/sound/ym2612.h @@ -6,15 +6,16 @@ #define _H_FM_FM_ /* compiler dependence */ +#include "../pico_types.h" #ifndef UINT8 -typedef unsigned char UINT8; /* unsigned 8bit */ -typedef unsigned short UINT16; /* unsigned 16bit */ -typedef unsigned int UINT32; /* unsigned 32bit */ +typedef u8 UINT8; /* unsigned 8bit */ +typedef u16 UINT16; /* unsigned 16bit */ +typedef u32 UINT32; /* unsigned 32bit */ #endif #ifndef INT8 -typedef signed char INT8; /* signed 8bit */ -typedef signed short INT16; /* signed 16bit */ -typedef signed int INT32; /* signed 32bit */ +typedef s8 INT8; /* signed 8bit */ +typedef s16 INT16; /* signed 16bit */ +typedef s32 INT32; /* signed 32bit */ #endif #if 1 @@ -43,10 +44,22 @@ typedef struct INT16 volume; /* #0x1a envelope counter | need_save */ UINT32 sl; /* #0x1c sustain level:sl_table[SL] */ - UINT32 eg_pack_ar; /* #0x20 (attack state) */ - UINT32 eg_pack_d1r; /* #0x24 (decay state) */ - UINT32 eg_pack_d2r; /* #0x28 (sustain state) */ - UINT32 eg_pack_rr; /* #0x2c (release state) */ + /* asm relies on this order: */ + union { + struct { + UINT32 eg_pack_rr; /* #0x20 1 (release state) */ + UINT32 eg_pack_d2r; /* #0x24 2 (sustain state) */ + UINT32 eg_pack_d1r; /* #0x28 3 (decay state) */ + UINT32 eg_pack_ar; /* #0x2c 4 (attack state) */ + }; + UINT32 eg_pack[4]; + }; + + UINT8 ssg; /* 0x30 SSG-EG waveform */ + UINT8 ssgn; + UINT16 ar_ksr; /* 0x32 ar+ksr */ + UINT16 vol_out; /* 0x34 current output from EG (without LFO) */ + UINT16 pad; } FM_SLOT; @@ -65,8 +78,8 @@ typedef struct UINT8 ams; /* channel AMS */ UINT8 kcode; /* +11 key code: */ - UINT8 fn_h; /* freq latch */ UINT8 pad2; + UINT8 upd_cnt; /* eg update counter */ UINT32 fc; /* fnum,blk:adjusted to sample rate */ UINT32 block_fnum; /* current blk/fnum value for this slot (can be different betweeen slots of one channel in 3slot mode) */ @@ -83,18 +96,22 @@ typedef struct UINT8 address; /* 10 address register | need_save */ UINT8 status; /* 11 status flag | need_save */ UINT8 mode; /* mode CSM / 3SLOT */ - UINT8 pad; + UINT8 flags; /* operational flags */ int TA; /* timer a */ int TAC; /* timer a maxval */ int TAT; /* timer a ticker | need_save */ UINT8 TB; /* timer b */ - UINT8 pad2[3]; + UINT8 fn_h; /* freq latch */ + UINT8 pad2[2]; int TBC; /* timer b maxval */ int TBT; /* timer b ticker | need_save */ /* local time tables */ INT32 dt_tab[8][32];/* DeTune table */ } FM_ST; +#define ST_SSG 1 +#define ST_DAC 2 + /***********************************************************/ /* OPN unit */ /***********************************************************/ @@ -141,6 +158,7 @@ typedef struct FM_OPN OPN; /* OPN state */ UINT32 slot_mask; /* active slot mask (performance hack) */ + UINT32 ssg_mask; /* active ssg mask (performance hack) */ } YM2612; #endif @@ -148,9 +166,9 @@ typedef struct extern YM2612 ym2612; #endif -void YM2612Init_(int baseclock, int rate); +void YM2612Init_(int baseclock, int rate, int flags); void YM2612ResetChip_(void); -int YM2612UpdateOne_(int *buffer, int length, int stereo, int is_buf_empty); +int YM2612UpdateOne_(s32 *buffer, int length, int stereo, int is_buf_empty); int YM2612Write_(unsigned int a, unsigned int v); //unsigned char YM2612Read_(void); @@ -159,9 +177,10 @@ int YM2612PicoTick_(int n); void YM2612PicoStateLoad_(void); void *YM2612GetRegs(void); -void YM2612PicoStateSave2(int tat, int tbt); -int YM2612PicoStateLoad2(int *tat, int *tbt); +void YM2612PicoStateSave2(int tat, int tbt, int busy); +int YM2612PicoStateLoad2(int *tat, int *tbt, int *busy); +/* NB must be macros for compiling GP2X 940 code */ #ifndef __GP2X__ #define YM2612Init YM2612Init_ #define YM2612ResetChip YM2612ResetChip_ @@ -169,23 +188,15 @@ int YM2612PicoStateLoad2(int *tat, int *tbt); #define YM2612PicoStateLoad YM2612PicoStateLoad_ #else /* GP2X specific */ -#include "../../platform/gp2x/940ctl.h" -extern int PicoOpt; -#define YM2612Init(baseclock,rate) { \ - if (PicoOpt&0x200) YM2612Init_940(baseclock, rate); \ - else YM2612Init_(baseclock, rate); \ -} -#define YM2612ResetChip() { \ - if (PicoOpt&0x200) YM2612ResetChip_940(); \ - else YM2612ResetChip_(); \ -} -#define YM2612UpdateOne(buffer,length,stereo,is_buf_empty) \ - (PicoOpt&0x200) ? YM2612UpdateOne_940(buffer, length, stereo, is_buf_empty) : \ - YM2612UpdateOne_(buffer, length, stereo, is_buf_empty); -#define YM2612PicoStateLoad() { \ - if (PicoOpt&0x200) YM2612PicoStateLoad_940(); \ - else YM2612PicoStateLoad_(); \ -} +#include +#define YM2612Init(baseclock, rate, flags) \ + (PicoIn.opt & POPT_EXT_FM ? YM2612Init_940 : YM2612Init_)(baseclock, rate, flags) +#define YM2612ResetChip() \ + (PicoIn.opt & POPT_EXT_FM ? YM2612ResetChip_940 : YM2612ResetChip_)() +#define YM2612PicoStateLoad() \ + (PicoIn.opt & POPT_EXT_FM ? YM2612PicoStateLoad_940 : YM2612PicoStateLoad_)() +#define YM2612UpdateOne(buffer, length, sterao, isempty) \ + (PicoIn.opt & POPT_EXT_FM ? YM2612UpdateOne_940 : YM2612UpdateOne_)(buffer, length, stereo, isempty) #endif /* __GP2X__ */ diff --git a/pico/sound/ym2612_arm.s b/pico/sound/ym2612_arm.S similarity index 53% rename from pico/sound/ym2612_arm.s rename to pico/sound/ym2612_arm.S index 7db31220..a81f34e8 100644 --- a/pico/sound/ym2612_arm.s +++ b/pico/sound/ym2612_arm.S @@ -1,6 +1,7 @@ /* * PicoDrive * (C) notaz, 2006 + * (C) irixxxx, 2020-2023 added SSG-EG and simple output rate interpolation * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -12,11 +13,16 @@ @ vim:filetype=armasm +#include + +@ very simple YM2612 output rate to sample rate adaption (~500k cycles @44100) +#define SSG_EG + .equiv SLOT1, 0 .equiv SLOT2, 2 .equiv SLOT3, 1 .equiv SLOT4, 3 -.equiv SLOT_STRUCT_SIZE, 0x30 +.equiv SLOT_STRUCT_SIZE, 0x38 .equiv TL_TAB_LEN, 0x1A00 @@ -26,107 +32,88 @@ .equiv EG_REL, 1 .equiv EG_OFF, 0 -.equiv EG_SH, 16 @ 16.16 fixed point (envelope generator timing) -.equiv EG_TIMER_OVERFLOW, (3*(1<= (INT32) SLOT->sl ) - movge r3, #EG_SUS strgeb r3, [r5,#0x17] @ state - b 4f + b 10f + +4: @ EG_ATT + subs r3, r3, #1 @ eg_inc_val_shift - 1 + mvnpl r2, r0 + movpl r2, r2, lsl r3 + addpl r0, r0, r2, asr #4 + cmp r0, #0 @ if (volume <= MIN_ATT_INDEX) + bgt 10f + ldr r2, [r5,#0x1c] + mov r0, #0 + cmp r2, #0 + movne r3, #EG_DEC + moveq r3, #EG_SUS + strb r3, [r5,#0x17] @ state + b 10f 2: @ EG_SUS - ldr r2, [r5,#0x28] @ eg_pack_d2r (1ci) - mov r0, r2, lsr #24 - mov r3, r3, lsl r0 - sub r3, r3, #1 - tst r1, r3 - bne 5f @ do smth for tl problem (set on init?) - mov r3, r1, lsr r0 - ldrh r0, [r5,#0x1a] @ volume - and r3, r3, #7 - add r3, r3, r3, lsl #1 - mov r3, r2, lsr r3 - and r3, r3, #7 @ shift for eg_inc calculation - mov r2, #1 - mov r3, r2, lsl r3 - add r0, r0, r3, asr #1 mov r2, #1024 sub r2, r2, #1 @ r2 = MAX_ATT_INDEX cmp r0, r2 @ if ( volume >= MAX_ATT_INDEX ) movge r0, r2 - b 4f + b 10f -3: @ EG_REL - ldr r2, [r5,#0x2c] @ eg_pack_rr (1ci) - mov r0, r2, lsr #24 - mov r3, r3, lsl r0 - sub r3, r3, #1 - tst r1, r3 - bne 5f @ do smth for tl problem (set on init?) - mov r3, r1, lsr r0 - ldrh r0, [r5,#0x1a] @ volume - and r3, r3, #7 - add r3, r3, r3, lsl #1 - mov r3, r2, lsr r3 - and r3, r3, #7 @ shift for eg_inc calculation - mov r2, #1 - mov r3, r2, lsl r3 - add r0, r0, r3, asr #1 +1: @ EG_REL mov r2, #1024 sub r2, r2, #1 @ r2 = MAX_ATT_INDEX cmp r0, r2 @ if ( volume >= MAX_ATT_INDEX ) @@ -134,32 +121,144 @@ movge r3, #EG_OFF strgeb r3, [r5,#0x17] @ state -4: +10: @ finish ldrh r3, [r5,#0x18] @ tl strh r0, [r5,#0x1a] @ volume -.if \slot == SLOT1 - mov r6, r6, lsr #16 - add r0, r0, r3 - orr r6, r0, r6, lsl #16 -.elseif \slot == SLOT2 - mov r6, r6, lsl #16 - add r0, r0, r3 - mov r0, r0, lsl #16 - orr r6, r0, r6, lsr #16 -.elseif \slot == SLOT3 - mov r7, r7, lsr #16 - add r0, r0, r3 - orr r7, r0, r7, lsl #16 -.elseif \slot == SLOT4 - mov r7, r7, lsl #16 - add r0, r0, r3 - mov r0, r0, lsl #16 - orr r7, r0, r7, lsr #16 -.endif +#if defined(SSG_EG) + b 11f -5: +9: @ SSG-EG mode + ldrh r0, [r5,#0x1a] @ volume, unsigned (0-1023) + cmp r2, #4 @ EG_ATT + beq 4f + + cmp r0, #0x200 @ if ( volume < 0x200 ) + movlt r0, #1 + movlt r3, r0, lsl r3 + ldrlth r0, [r5,#0x1a] @ volume, unsigned (0-1023) + movlt r3, r3, lsr #1 @ eg_inc_val + addlt r0, r0, r3, lsl #2 @ ...*4 + + cmp r2, #2 + blt 1f @ EG_REL + beq 10f @ EG_SUS - nothing more to do + +3: @ EG_DEC + ldr r2, [r5,#0x1c] @ sl (can be 16bit?) + mov r3, #EG_SUS + cmp r0, r2 @ if ( volume >= (INT32) SLOT->sl ) + strgeb r3, [r5,#0x17] @ state + b 10f + +4: @ EG_ATT + subs r3, r3, #1 @ eg_inc_val_shift - 1 + mvnpl r2, r0 + movpl r2, r2, lsl r3 + addpl r0, r0, r2, asr #4 + cmp r0, #0 @ if (volume <= MIN_ATT_INDEX) + bgt 10f + ldr r2, [r5,#0x1c] @ sl + mov r0, #0 + cmp r2, #0 + movne r3, #EG_DEC + moveq r3, #EG_SUS + strb r3, [r5,#0x17] @ state + b 10f + +1: @ EG_REL + cmp r0, #0x200 @ if ( volume >= 0x200 ) + movge r0, #1024 + subge r0, r0, #1 + movge r3, #EG_OFF + strgeb r3, [r5,#0x17] @ state + ldrh r3, [r5,#0x18] @ tl + b 11f + +10: @ finish + ldrb r2, [r5,#0x30] @ ssg + ldrh r3, [r5,#0x18] @ tl + strh r0, [r5,#0x1a] @ volume + recalc_volume_ssg r0 r2 + +11: + add r0, r0, r3 @ volume += tl + strh r0, [r5,#0x34] @ vol_out +#endif + +0: @ EG_OFF .endm +#if defined(SSG_EG) +@ r5=slot, trashes: r0,r2,r3 +.macro update_ssg_eg + ldrh r0, [r5,#0x30] @ ssg+ssgn + ldrb r2, [r5,#0x17] @ state + ldrh r3, [r5,#0x1a] @ volume + tst r0, #0x08 @ ssg enabled && + beq 10f + cmp r2, #EG_REL+1 @ state > EG_REL && + cmpge r3, #0x200 @ volume >= 0x200? + blt 10f + + tst r0, #0x01 + beq 1f + + tst r0, #0x02 + eorne r0, r0, r0, lsr #8 @ ssg ^= ssgn ^ 4 + eorne r0, r0, #0x4 + orrne r0, r0, #0x400 @ ssgn = 4 + strneh r0, [r5,#0x30] + + tst r0, #0x4 @ if ( !(ssg&0x04) ) + bne 9f + cmp r2, #EG_ATT @ if ( state != EG_ATT ) + movne r3, #0x400 + subne r3, r3, #1 + strneh r3, [r5,#0x1a] @ volume = MAX_ATT + b 9f + +1: tst r0, #0x02 + eorne r0, r0, #0x4 @ ssg ^= 4 + eorne r0, r0, #0x400 @ ssgn ^= 4 + strneh r0, [r5,#0x30] + moveq r0, #0 + streq r0, [lr,#0x10] @ phase = 0 + + cmp r2, #EG_ATT @ if ( state != EG_ATT ) + beq 9f + + ldr r0, [r5,#0x1c] @ sl + mov r2, #EG_SUS @ state = sl==MIN_ATT ? EG_SUS:EG_DEC + cmp r0, #0 + + ldrh r0, [r5,#0x32] @ ar+ksr + movne r2, #EG_DEC + cmp r0, #32+62 @ if ( ar+ksr >= 32+62 ) + movge r3, #0 + strgeh r3, [r5,#0x1a] @ volume = MIN_ATT + bge 8f + + cmp r3, #0 + movgt r2, #EG_ATT +8: strb r2, [r5,#0x17] @ state +9: + ldrb r0, [r5,#0x30] @ ssg + ldrh r2, [r5,#0x18] @ tl + recalc_volume_ssg r3 r0 + add r3, r3, r2 @ volume += tl + strh r3, [r5,#0x34] @ vol_out +10: +.endm + +@ r5=slot +.macro recalc_volume_ssg vol ssg +@ and \ssg, \ssg, #0x0c + cmp \ssg, #0x0c @ if (~ssg&0x0c) + rsbge \vol, \vol, #0x200 @ volume = (0x200-volume) & MAX_ATT + movge \vol, \vol, lsl #22 + movge \vol, \vol, lsr #22 +.endm +#endif @ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt, r3=scratch .macro advance_lfo_m @@ -167,15 +266,16 @@ cmp r2, r1, lsr #LFO_SH beq 0f and r3, r2, #0x3f - cmp r2, #0x40 - rsbge r3, r3, #0x3f + tst r2, #0x40 + eoreq r3, r3, #0x3f bic r12,r12, #0xff000000 @ lfo_ampm &= 0xff orr r12,r12, r3, lsl #1+24 mov r2, r2, lsr #2 cmp r2, r1, lsr #LFO_SH+2 + andne r3, r2, #0x1f bicne r12,r12, #0xff0000 - orrne r12,r12, r2, lsl #16 + orrne r12,r12, r3, lsl #16 0: .endm @@ -187,14 +287,14 @@ tstne r12, #(1<<(\slot+8)) .if \slot == SLOT1 mov r1, r6, lsl #16 - mov r1, r1, lsr #17 + mov r1, r1, lsr #16 .elseif \slot == SLOT2 - mov r1, r6, lsr #17 + mov r1, r6, lsr #16 .elseif \slot == SLOT3 mov r1, r7, lsl #16 - mov r1, r1, lsr #17 + mov r1, r1, lsr #16 .elseif \slot == SLOT4 - mov r1, r7, lsr #17 + mov r1, r7, lsr #16 .endif andne r2, r12, #0xc0 movne r2, r2, lsr #6 @@ -203,19 +303,21 @@ .endm +@ \r=sin/result, r1=env, r3=ym_tl_tab .macro lookup_tl r tst \r, #0x100 eorne \r, \r, #0xff @ if (sin & 0x100) sin = 0xff - (sin&0xff); tst \r, #0x200 and \r, \r, #0xff - orr \r, \r, r1, lsl #8 + bic r1, r1, #1 + orr \r, \r, r1, lsl #7 mov \r, \r, lsl #1 ldrh \r, [r3, \r] @ 2ci if ne rsbne \r, \r, #0 .endm -@ lr=context, r12=pack (stereo, lastchan, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16]) +@ lr=context, r12=pack (stereo, ssg_enabled, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16]) @ r0-r2=scratch, r3=sin_tab, r5=scratch, r6-r7=vol_out[4], r10=op1_out .macro upd_algo0_m @@ -227,18 +329,17 @@ ldr r2, [lr, #0x18] ldr r0, [lr, #0x38] @ mem (signed) mov r2, r2, lsr #16 - add r0, r2, r0, lsr #1 + add r0, r2, r0, asr #1 lookup_tl r0 @ r0=c2 0: - @ SLOT4 make_eg_out SLOT4 cmp r1, #ENV_QUIET movcs r0, #0 bcs 1f ldr r2, [lr, #0x1c] - mov r0, r0, lsr #1 + mov r0, r0, asr #1 add r0, r0, r2, lsr #16 lookup_tl r0 @ r0=output smp @@ -249,7 +350,7 @@ movcs r2, #0 bcs 2f ldr r2, [lr, #0x14] @ 1ci - mov r5, r10, lsr #17 + mov r5, r10, asr #17 add r2, r5, r2, lsr #16 lookup_tl r2 @ r2=mem @@ -268,7 +369,7 @@ ldr r2, [lr, #0x18] ldr r0, [lr, #0x38] @ mem (signed) mov r2, r2, lsr #16 - add r0, r2, r0, lsr #1 + add r0, r2, r0, asr #1 lookup_tl r0 @ r0=c2 0: @@ -278,7 +379,7 @@ movcs r0, #0 bcs 1f ldr r2, [lr, #0x1c] - mov r0, r0, lsr #1 + mov r0, r0, asr #1 add r0, r0, r2, lsr #16 lookup_tl r0 @ r0=output smp @@ -308,7 +409,7 @@ ldr r2, [lr, #0x18] ldr r0, [lr, #0x38] @ mem (signed) mov r2, r2, lsr #16 - add r0, r2, r0, lsr #1 + add r0, r2, r0, asr #1 lookup_tl r0 @ r0=c2 0: @@ -320,7 +421,7 @@ movcs r0, #0 bcs 1f ldr r2, [lr, #0x1c] - mov r0, r0, lsr #1 + mov r0, r0, asr #1 add r0, r0, r2, lsr #16 lookup_tl r0 @ r0=output smp @@ -345,9 +446,9 @@ make_eg_out SLOT3 cmp r1, #ENV_QUIET ldr r2, [lr, #0x38] @ mem (for future) - movcs r0, r2 + mov r0, #0 bcs 0f - ldr r0, [lr, #0x18] @ 1ci + ldr r0, [lr, #0x18] @ phase3 mov r0, r0, lsr #16 lookup_tl r0 @ r0=c2 @@ -360,7 +461,7 @@ movcs r0, #0 bcs 1f ldr r2, [lr, #0x1c] - mov r0, r0, lsr #1 + mov r0, r0, asr #1 add r0, r0, r2, lsr #16 lookup_tl r0 @ r0=output smp @@ -370,13 +471,13 @@ cmp r1, #ENV_QUIET movcs r2, #0 bcs 2f - ldr r2, [lr, #0x14] - mov r5, r10, lsr #17 + ldr r2, [lr, #0x14] @ phase2 + mov r5, r10, asr #17 add r2, r5, r2, lsr #16 lookup_tl r2 @ r2=mem 2: - str r2, [lr, #0x38] @ mem + str r2, [lr, #0x38] @ mem .endm @@ -398,7 +499,7 @@ movcs r0, #0 bcs 1f ldr r2, [lr, #0x1c] - mov r0, r0, lsr #1 + mov r0, r0, asr #1 add r0, r0, r2, lsr #16 lookup_tl r0 @ r0=output smp @@ -408,7 +509,7 @@ cmp r1, #ENV_QUIET bcs 2f ldr r2, [lr, #0x14] - mov r5, r10, lsr #17 + mov r5, r10, asr #17 add r2, r5, r2, lsr #16 lookup_tl r2 add r0, r0, r2 @ add to smp @@ -427,7 +528,7 @@ ldr r2, [lr, #0x18] ldr r0, [lr, #0x38] @ mem (signed) mov r2, r2, lsr #16 - add r0, r2, r0, lsr #1 + add r0, r2, r0, asr #1 lookup_tl r0 @ r0=output smp 0: @@ -436,7 +537,7 @@ cmp r1, #ENV_QUIET bcs 1f ldr r2, [lr, #0x1c] - mov r5, r10, lsr #17 + mov r5, r10, asr #17 add r2, r5, r2, lsr #16 lookup_tl r2 add r0, r0, r2 @ add to smp @@ -446,7 +547,7 @@ cmp r1, #ENV_QUIET bcs 2f ldr r2, [lr, #0x14] - mov r5, r10, lsr #17 + mov r5, r10, asr #17 add r2, r5, r2, lsr #16 lookup_tl r2 add r0, r0, r2 @ add to smp @@ -483,7 +584,7 @@ cmp r1, #ENV_QUIET bcs 2f ldr r2, [lr, #0x14] - mov r5, r10, lsr #17 + mov r5, r10, asr #17 add r2, r5, r2, lsr #16 lookup_tl r2 add r0, r0, r2 @ add to smp @@ -541,9 +642,9 @@ movne r0, r0, asr #16 movne r0, r0, lsl r2 - ldr r2, [lr, #0x10] + ldr r2, [lr, #0x10] @ phase1 + add r0, r0, r2 mov r0, r0, lsr #16 - add r0, r0, r2, lsr #16 lookup_tl r0 mov r10,r10,lsl #16 @ ct->op1_out <<= 16; mov r0, r0, lsl #16 @@ -553,189 +654,8 @@ .endm -/* -.global update_eg_phase @ FM_SLOT *SLOT, UINT32 eg_cnt - -update_eg_phase: - stmfd sp!, {r5,r6} - mov r5, r0 @ slot - ldrh r3, [r5,#0x18] @ tl - ldrh r6, [r5,#0x1a] @ volume - add r6, r6, r3 - update_eg_phase_slot SLOT1 - mov r0, r6 - ldmfd sp!, {r5,r6} - bx lr -.pool - - -.global advance_lfo @ int lfo_ampm, UINT32 lfo_cnt_old, UINT32 lfo_cnt - -advance_lfo: - mov r12, r0, lsl #16 - advance_lfo_m - mov r0, r12, lsr #16 - bx lr -.pool - - -.global upd_algo0 @ chan_rend_context *c -upd_algo0: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo0_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo1 @ chan_rend_context *c -upd_algo1: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo1_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo2 @ chan_rend_context *c -upd_algo2: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo2_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo3 @ chan_rend_context *c -upd_algo3: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo3_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo4 @ chan_rend_context *c -upd_algo4: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo4_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo5 @ chan_rend_context *c -upd_algo5: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo5_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo6 @ chan_rend_context *c -upd_algo6: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo6_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_algo7 @ chan_rend_context *c -upd_algo7: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_algo7_m - - ldmfd sp!, {r4-r10,pc} -.pool - - -.global upd_slot1 @ chan_rend_context *c -upd_slot1: - stmfd sp!, {r4-r10,lr} - mov lr, r0 - - ldr r3, =ym_sin_tab - ldr r5, =ym_tl_tab - ldmia lr, {r6-r7} - ldr r10, [lr, #0x54] - ldr r12, [lr, #0x4c] - - upd_slot1_m - str r10, [lr, #0x38] - - ldmfd sp!, {r4-r10,pc} -.pool -*/ - - -@ lr=context, r12=pack (stereo, lastchan, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16]) -@ r0-r2=scratch, r3=sin_tab/scratch, r4=(length<<8)|unused[4],was_update,algo[3], r5=tl_tab/slot, +@ lr=context, r12=pack (stereo, ssg_enabled, disabled, lfo_enabled | pan_r, pan_l, ams[2] | AMmasks[4] | FB[4] | lfo_ampm[16]) +@ r0-r2=scratch, r3=sin_tab/scratch, r4=(length<<8)|dac,upd_cnt[3],was_update,algo[3], r5=tl_tab/slot, @ r6-r7=vol_out[4], r8=eg_timer, r9=eg_timer_add[31:16], r10=op1_out, r11=buffer .global chan_render_loop @ chan_rend_context *ct, int *buffer, int length @@ -746,80 +666,135 @@ chan_render_loop: ldr r12, [lr, #0x4c] ldr r0, [lr, #0x50] mov r11, r1 - and r0, r0, #7 - orr r4, r4, r0 @ (length<<8)|algo - add r0, lr, #0x44 - ldmia r0, {r8,r9} @ eg_timer, eg_timer_add + and r0, r0, #0xf7 + orr r4, r4, r0 @ (length<<8)|dac,upd_cnt[2],unused,algo[3] + ldr r8, [lr, #0x44] @ eg_timer + ldr r9, [lr, #0x48] @ eg_timer_add ldr r10, [lr, #0x54] @ op1_out - ldmia lr, {r6,r7} @ load volumes - - tst r12, #8 @ lfo? - beq crl_loop - -crl_loop_lfo: - add r0, lr, #0x30 - ldmia r0, {r1,r2} - add r2, r2, r1 - str r2, [lr, #0x30] - @ r12=lfo_ampm[31:16], r1=lfo_cnt_old, r2=lfo_cnt - advance_lfo_m crl_loop: subs r4, r4, #0x100 bmi crl_loop_end - @ -- EG -- + ldr r5, [lr, #0x40] @ CH + mov r0, #0 add r8, r8, r9 - cmp r8, #EG_TIMER_OVERFLOW - bcc eg_done - add r0, lr, #0x3c - ldmia r0, {r1,r5} @ eg_cnt, CH -eg_loop: - sub r8, r8, #EG_TIMER_OVERFLOW - add r1, r1, #1 - @ SLOT1 (0) - @ r5=slot, r1=eg_cnt, trashes: r0,r2,r3 - update_eg_phase_slot SLOT1 - add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT2 (2) - update_eg_phase_slot SLOT2 - sub r5, r5, #SLOT_STRUCT_SIZE @ SLOT3 (1) - update_eg_phase_slot SLOT3 - add r5, r5, #SLOT_STRUCT_SIZE*2 @ SLOT4 (3) - update_eg_phase_slot SLOT4 + subs r8, r8, #(1< +#include -#include "../cpu/sh2/sh2.h" +#include #include "sound/ym2612.h" +#include "sound/emu2413/emu2413.h" #include "state.h" -// sn76496 +// sn76496 & ym2413 extern int *sn76496_regs; static arearw *areaRead; @@ -60,7 +61,7 @@ static void *open_save_file(const char *fname, int is_save) int len = strlen(fname); void *afile = NULL; - if (len > 3 && strcmp(fname + len - 3, ".gz") == 0) + if (len > 3 && strcasecmp(fname + len - 3, ".gz") == 0) { if ( (afile = gzopen(fname, is_save ? "wb" : "rb")) ) { set_cbs(1); @@ -78,54 +79,6 @@ static void *open_save_file(const char *fname, int is_save) return afile; } -// legacy savestate loading -#define SCANP(f, x) areaRead(&Pico.x, sizeof(Pico.x), 1, f) - -static int state_load_legacy(void *file) -{ - unsigned char head[32]; - unsigned char cpu[0x60]; - unsigned char cpu_z80[Z80_STATE_SIZE]; - void *ym2612_regs; - int ok; - - memset(&cpu,0,sizeof(cpu)); - memset(&cpu_z80,0,sizeof(cpu_z80)); - - memset(head, 0, sizeof(head)); - areaRead(head, sizeof(head), 1, file); - if (strcmp((char *)head, "Pico") != 0) - return -1; - - elprintf(EL_STATUS, "legacy savestate"); - - // Scan all the memory areas: - SCANP(file, ram); - SCANP(file, vram); - SCANP(file, zram); - SCANP(file, cram); - SCANP(file, vsram); - - // Pack, scan and unpack the cpu data: - areaRead(cpu, sizeof(cpu), 1, file); - SekUnpackCpu(cpu, 0); - - SCANP(file, m); - SCANP(file, video); - - ok = areaRead(cpu_z80, sizeof(cpu_z80), 1, file) == sizeof(cpu_z80); - // do not unpack if we fail to load z80 state - if (!ok) z80_reset(); - else z80_unpack(cpu_z80); - - ym2612_regs = YM2612GetRegs(); - areaRead(sn76496_regs, 28*4, 1, file); - areaRead(ym2612_regs, 0x200+4, 1, file); - ym2612_unpack_state(); - - return 0; -} - // --------------------------------------------------------------------------- typedef enum { @@ -148,10 +101,10 @@ typedef enum { CHUNK_BRAM, CHUNK_GA_REGS, CHUNK_PCM, - CHUNK_CDC, - CHUNK_CDD, // 20 - CHUNK_SCD, - CHUNK_RC, + CHUNK_CDC, // old + CHUNK_CDD, // 20 old + CHUNK_SCD, // old + CHUNK_RC, // old CHUNK_MISC_CD, // CHUNK_IOPORTS, // versions < 1.70 did not save that.. @@ -171,14 +124,21 @@ typedef enum { CHUNK_DRAM, CHUNK_32XPAL, CHUNK_32X_EVT, + //rename CHUNK_32X_FIRST = CHUNK_MSH2, CHUNK_32X_LAST = CHUNK_32X_EVT, // add new stuff here CHUNK_CD_EVT = 50, CHUNK_CD_GFX, + CHUNK_CD_CDC, + CHUNK_CD_CDD, + CHUNK_YM2413, + CHUNK_PICO_PCM, + CHUNK_PICO, // CHUNK_DEFAULT_COUNT, CHUNK_CARTHW_ = CHUNK_CARTHW, // 64 (defined in PicoInt) + } chunk_name_e; static const char * const chunk_names[CHUNK_DEFAULT_COUNT] = { @@ -223,11 +183,11 @@ static const char * const chunk_names[CHUNK_DEFAULT_COUNT] = { "SSH2 BIOS", // 35 "SDRAM", "DRAM", - "PAL", - "events", + "32X palette", + "32X events", }; -static int write_chunk(chunk_name_e name, int len, void *data, void *file) +static int write_chunk(unsigned char name, int len, void *data, void *file) { size_t bwritten = 0; bwritten += areaWrite(&name, 1, 1, file); @@ -237,64 +197,97 @@ static int write_chunk(chunk_name_e name, int len, void *data, void *file) return (bwritten == len + 4 + 1); } +#define CHUNK_LIMIT_W 18772 // sizeof(cdc) + #define CHECKED_WRITE(name,len,data) { \ if (PicoStateProgressCB && name < CHUNK_DEFAULT_COUNT && chunk_names[name]) { \ - strncpy(sbuff + 9, chunk_names[name], sizeof(sbuff) - 9); \ + strncpy(sbuff + 9, chunk_names[name], sizeof(sbuff)-1 - 9); \ + sbuff[sizeof(sbuff)-1] = '\0'; \ PicoStateProgressCB(sbuff); \ } \ - if (!write_chunk(name, len, data, file)) return 1; \ + if (data == buf2 && len > CHUNK_LIMIT_W) \ + goto out; \ + if (!write_chunk(name, len, data, file)) \ + goto out; \ } #define CHECKED_WRITE_BUFF(name,buff) { \ if (PicoStateProgressCB && name < CHUNK_DEFAULT_COUNT && chunk_names[name]) { \ - strncpy(sbuff + 9, chunk_names[name], sizeof(sbuff) - 9); \ + strncpy(sbuff + 9, chunk_names[name], sizeof(sbuff)-1 - 9); \ + sbuff[sizeof(sbuff)-1] = '\0'; \ PicoStateProgressCB(sbuff); \ } \ - if (!write_chunk(name, sizeof(buff), &buff, file)) return 1; \ + if (!write_chunk(name, sizeof(buff), &buff, file)) \ + goto out; \ } static int state_save(void *file) { char sbuff[32] = "Saving.. "; unsigned char buff[0x60], buff_z80[Z80_STATE_SIZE]; - void *ym2612_regs = YM2612GetRegs(); - int ver = 0x0170; // not really used.. + void *ym_regs = YM2612GetRegs(); + void *buf2 = NULL; + int ver = 0x0191; // not really used.. + int retval = -1; int len; + buf2 = malloc(CHUNK_LIMIT_W); + if (buf2 == NULL) + return -1; + areaWrite("PicoSEXT", 1, 8, file); areaWrite(&ver, 1, 4, file); - if (!(PicoAHW & PAHW_SMS)) { + if (!(PicoIn.AHW & PAHW_SMS)) { + // the patches can cause incompatible saves with no-idle + SekFinishIdleDet(); + memset(buff, 0, sizeof(buff)); SekPackCpu(buff, 0); CHECKED_WRITE_BUFF(CHUNK_M68K, buff); - CHECKED_WRITE_BUFF(CHUNK_RAM, Pico.ram); - CHECKED_WRITE_BUFF(CHUNK_VSRAM, Pico.vsram); - CHECKED_WRITE_BUFF(CHUNK_IOPORTS, Pico.ioports); - ym2612_pack_state(); - CHECKED_WRITE(CHUNK_FM, 0x200+4, ym2612_regs); + CHECKED_WRITE_BUFF(CHUNK_RAM, PicoMem.ram); + CHECKED_WRITE_BUFF(CHUNK_VSRAM, PicoMem.vsram); + CHECKED_WRITE_BUFF(CHUNK_IOPORTS, PicoMem.ioports); + if (PicoIn.AHW & PAHW_PICO) { + len = PicoPicoPCMSave(buf2, CHUNK_LIMIT_W); + CHECKED_WRITE(CHUNK_PICO_PCM, len, buf2); + CHECKED_WRITE(CHUNK_PICO, sizeof(PicoPicohw), &PicoPicohw); + } else { + ym2612_pack_state(); + ym_regs = YM2612GetRegs(); + CHECKED_WRITE(CHUNK_FM, 0x200+4, ym_regs); + } + + if (!(PicoIn.opt & POPT_DIS_IDLE_DET)) + SekInitIdleDet(); } else { CHECKED_WRITE_BUFF(CHUNK_SMS, Pico.ms); + ym_regs = YM2413GetRegs(); + CHECKED_WRITE(CHUNK_YM2413, 0x40+4, ym_regs); } - - CHECKED_WRITE_BUFF(CHUNK_VRAM, Pico.vram); - CHECKED_WRITE_BUFF(CHUNK_ZRAM, Pico.zram); - CHECKED_WRITE_BUFF(CHUNK_CRAM, Pico.cram); - CHECKED_WRITE_BUFF(CHUNK_MISC, Pico.m); - CHECKED_WRITE_BUFF(CHUNK_VIDEO, Pico.video); - - z80_pack(buff_z80); - CHECKED_WRITE_BUFF(CHUNK_Z80, buff_z80); CHECKED_WRITE(CHUNK_PSG, 28*4, sn76496_regs); - if (PicoAHW & PAHW_MCD) + if (!(PicoIn.AHW & PAHW_PICO)) { + z80_pack(buff_z80); + CHECKED_WRITE_BUFF(CHUNK_Z80, buff_z80); + CHECKED_WRITE_BUFF(CHUNK_ZRAM, PicoMem.zram); + } + + CHECKED_WRITE_BUFF(CHUNK_VRAM, PicoMem.vram); + CHECKED_WRITE_BUFF(CHUNK_CRAM, PicoMem.cram); + + CHECKED_WRITE_BUFF(CHUNK_MISC, Pico.m); + PicoVideoSave(); + CHECKED_WRITE_BUFF(CHUNK_VIDEO, Pico.video); + + if (PicoIn.AHW & PAHW_MCD) { memset(buff, 0, sizeof(buff)); SekPackCpu(buff, 1); if (Pico_mcd->s68k_regs[3] & 4) // 1M mode? wram_1M_to_2M(Pico_mcd->word_ram2M); - memcpy(&Pico_mcd->m.hint_vector, Pico_mcd->bios + 0x72, + memcpy(&Pico_mcd->m.hint_vector, Pico.rom + 0x72, sizeof(Pico_mcd->m.hint_vector)); CHECKED_WRITE_BUFF(CHUNK_S68K, buff); @@ -304,22 +297,24 @@ static int state_save(void *file) CHECKED_WRITE_BUFF(CHUNK_BRAM, Pico_mcd->bram); CHECKED_WRITE_BUFF(CHUNK_GA_REGS, Pico_mcd->s68k_regs); // GA regs, not CPU regs CHECKED_WRITE_BUFF(CHUNK_PCM, Pico_mcd->pcm); - CHECKED_WRITE_BUFF(CHUNK_CDD, Pico_mcd->cdd); - CHECKED_WRITE_BUFF(CHUNK_CDC, Pico_mcd->cdc); - CHECKED_WRITE_BUFF(CHUNK_SCD, Pico_mcd->scd); CHECKED_WRITE_BUFF(CHUNK_MISC_CD, Pico_mcd->m); memset(buff, 0, 0x40); memcpy(buff, pcd_event_times, sizeof(pcd_event_times)); CHECKED_WRITE(CHUNK_CD_EVT, 0x40, buff); - len = gfx_context_save(buff); - CHECKED_WRITE(CHUNK_CD_GFX, len, buff); + + len = gfx_context_save(buf2); + CHECKED_WRITE(CHUNK_CD_GFX, len, buf2); + len = cdc_context_save(buf2); + CHECKED_WRITE(CHUNK_CD_CDC, len, buf2); + len = cdd_context_save(buf2); + CHECKED_WRITE(CHUNK_CD_CDD, len, buf2); if (Pico_mcd->s68k_regs[3] & 4) // convert back wram_2M_to_1M(Pico_mcd->word_ram2M); } #ifndef NO_32X - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) { unsigned char cpubuff[SH2_STATE_SIZE]; @@ -358,7 +353,13 @@ static int state_save(void *file) CHECKED_WRITE(chwc->chunk, chwc->size, chwc->ptr); } - return 0; + CHECKED_WRITE(0, 0, NULL); + retval = 0; + +out: + if (buf2 != NULL) + free(buf2); + return retval; } static int g_read_offs = 0; @@ -366,7 +367,7 @@ static int g_read_offs = 0; #define R_ERROR_RETURN(error) \ { \ elprintf(EL_STATUS, "load_state @ %x: " error, g_read_offs); \ - return 1; \ + goto out; \ } // when is eof really set? @@ -374,7 +375,6 @@ static int g_read_offs = 0; if (areaRead(data, 1, len, file) != len) { \ if (len == 1 && areaEof(file)) goto readend; \ R_ERROR_RETURN("areaRead: premature EOF\n"); \ - return 1; \ } \ g_read_offs += len; \ } @@ -390,14 +390,24 @@ static int g_read_offs = 0; #define CHECKED_READ_BUFF(buff) CHECKED_READ2(sizeof(buff), &buff); +#define CHUNK_LIMIT_R 0x10960 // sizeof(old_cdc) + +#define CHECKED_READ_LIM(data) { \ + if (len > CHUNK_LIMIT_R) \ + R_ERROR_RETURN("chunk size over limit."); \ + CHECKED_READ(len, data); \ +} + static int state_load(void *file) { unsigned char buff_m68k[0x60], buff_s68k[0x60]; unsigned char buff_z80[Z80_STATE_SIZE]; unsigned char buff_sh2[SH2_STATE_SIZE]; - unsigned char buff[0x40]; + unsigned char *buf = NULL; unsigned char chunk; - void *ym2612_regs; + void *ym_regs; + int len_check; + int retval = -1; char header[8]; int ver, len; @@ -405,6 +415,10 @@ static int state_load(void *file) memset(buff_s68k, 0, sizeof(buff_s68k)); memset(buff_z80, 0, sizeof(buff_z80)); + buf = malloc(CHUNK_LIMIT_R); + if (buf == NULL) + return -1; + g_read_offs = 0; CHECKED_READ(8, header); if (strncmp(header, "PicoSMCD", 8) && strncmp(header, "PicoSEXT", 8)) @@ -416,12 +430,13 @@ static int state_load(void *file) while (!areaEof(file)) { + len_check = 0; CHECKED_READ(1, &chunk); CHECKED_READ(4, &len); if (len < 0 || len > 1024*512) R_ERROR_RETURN("bad length"); - if (CHUNK_S68K <= chunk && chunk <= CHUNK_MISC_CD && !(PicoAHW & PAHW_MCD)) + if (CHUNK_S68K <= chunk && chunk <= CHUNK_MISC_CD && !(PicoIn.AHW & PAHW_MCD)) R_ERROR_RETURN("cd chunk in non CD state?"); - if (CHUNK_32X_FIRST <= chunk && chunk <= CHUNK_32X_LAST && !(PicoAHW & PAHW_32X)) + if (CHUNK_32X_FIRST <= chunk && chunk <= CHUNK_32X_LAST && !(PicoIn.AHW & PAHW_32X)) Pico32xStartup(); switch (chunk) @@ -434,21 +449,38 @@ static int state_load(void *file) CHECKED_READ_BUFF(buff_z80); break; - case CHUNK_RAM: CHECKED_READ_BUFF(Pico.ram); break; - case CHUNK_VRAM: CHECKED_READ_BUFF(Pico.vram); break; - case CHUNK_ZRAM: CHECKED_READ_BUFF(Pico.zram); break; - case CHUNK_CRAM: CHECKED_READ_BUFF(Pico.cram); break; - case CHUNK_VSRAM: CHECKED_READ_BUFF(Pico.vsram); break; + case CHUNK_RAM: CHECKED_READ_BUFF(PicoMem.ram); break; + case CHUNK_VRAM: CHECKED_READ_BUFF(PicoMem.vram); break; + case CHUNK_ZRAM: CHECKED_READ_BUFF(PicoMem.zram); break; + case CHUNK_CRAM: CHECKED_READ_BUFF(PicoMem.cram); break; + case CHUNK_VSRAM: CHECKED_READ_BUFF(PicoMem.vsram); break; case CHUNK_MISC: CHECKED_READ_BUFF(Pico.m); break; - case CHUNK_VIDEO: CHECKED_READ_BUFF(Pico.video); break; - case CHUNK_IOPORTS: CHECKED_READ_BUFF(Pico.ioports); break; + case CHUNK_VIDEO: + CHECKED_READ_BUFF(Pico.video); + PicoVideoLoad(); + break; + + case CHUNK_IOPORTS: CHECKED_READ_BUFF(PicoMem.ioports); break; case CHUNK_PSG: CHECKED_READ2(28*4, sn76496_regs); break; + case CHUNK_YM2413: + ym_regs = YM2413GetRegs(); + CHECKED_READ2(0x40+4, ym_regs); + YM2413UnpackState(); + break; case CHUNK_FM: - ym2612_regs = YM2612GetRegs(); - CHECKED_READ2(0x200+4, ym2612_regs); + ym_regs = YM2612GetRegs(); + CHECKED_READ2(0x200+4, ym_regs); ym2612_unpack_state(); break; + case CHUNK_PICO_PCM: + CHECKED_READ(len, buf); + PicoPicoPCMLoad(buf, len); + break; + case CHUNK_PICO: + CHECKED_READ_BUFF(PicoPicohw); + break; + case CHUNK_SMS: CHECKED_READ_BUFF(Pico.ms); break; @@ -464,19 +496,37 @@ static int state_load(void *file) case CHUNK_BRAM: CHECKED_READ_BUFF(Pico_mcd->bram); break; case CHUNK_GA_REGS: CHECKED_READ_BUFF(Pico_mcd->s68k_regs); break; case CHUNK_PCM: CHECKED_READ_BUFF(Pico_mcd->pcm); break; - case CHUNK_CDD: CHECKED_READ_BUFF(Pico_mcd->cdd); break; - case CHUNK_CDC: CHECKED_READ_BUFF(Pico_mcd->cdc); break; - case CHUNK_SCD: CHECKED_READ_BUFF(Pico_mcd->scd); break; case CHUNK_MISC_CD: CHECKED_READ_BUFF(Pico_mcd->m); break; case CHUNK_CD_EVT: - CHECKED_READ_BUFF(buff); - memcpy(pcd_event_times, buff, sizeof(pcd_event_times)); + CHECKED_READ2(0x40, buf); + memcpy(pcd_event_times, buf, sizeof(pcd_event_times)); break; case CHUNK_CD_GFX: - CHECKED_READ2(0x18, buff); - gfx_context_load(buff); + CHECKED_READ_LIM(buf); + len_check = gfx_context_load(buf); + break; + + case CHUNK_CD_CDC: + CHECKED_READ_LIM(buf); + len_check = cdc_context_load(buf); + break; + + case CHUNK_CD_CDD: + CHECKED_READ_LIM(buf); + len_check = cdd_context_load(buf); + break; + + // old, to be removed: + case CHUNK_CDC: + CHECKED_READ_LIM(buf); + cdc_context_load_old(buf); + break; + + case CHUNK_SCD: + CHECKED_READ_LIM(buf); + cdd_context_load_old(buf); break; // 32x stuff @@ -504,11 +554,13 @@ static int state_load(void *file) case CHUNK_32XPAL: CHECKED_READ_BUFF(Pico32xMem->pal); break; case CHUNK_32X_EVT: - CHECKED_READ_BUFF(buff); - memcpy(p32x_event_times, buff, sizeof(p32x_event_times)); + CHECKED_READ2(0x40, buf); + memcpy(p32x_event_times, buf, sizeof(p32x_event_times)); break; #endif default: + if (!len && !chunk) + goto readend; if (carthw_chunks != NULL) { carthw_state_chunk *chwc; @@ -523,38 +575,46 @@ static int state_load(void *file) areaSeek(file, len, SEEK_CUR); break; } -breakswitch:; +breakswitch: + if (len_check != 0 && len_check != len) + elprintf(EL_STATUS, "load_state: chunk %d has bad len %d/%d", + len, len_check); } readend: - if (PicoAHW & PAHW_SMS) + if (PicoIn.AHW & PAHW_SMS) PicoStateLoadedMS(); - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) Pico32xStateLoaded(1); + if (PicoLoadStateHook != NULL) + PicoLoadStateHook(); + // must unpack 68k and z80 after banks are set up - if (!(PicoAHW & PAHW_SMS)) + if (!(PicoIn.AHW & PAHW_SMS)) SekUnpackCpu(buff_m68k, 0); - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) SekUnpackCpu(buff_s68k, 1); z80_unpack(buff_z80); - // due to dep from 68k cycles.. - SekCycleAim = SekCycleCnt; - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) Pico32xStateLoaded(0); - if (PicoAHW & PAHW_MCD) - { - SekCycleAimS68k = SekCycleCntS68k; + if (PicoIn.AHW & PAHW_MCD) pcd_state_loaded(); - - if (!(Pico_mcd->s68k_regs[0x36] & 1) && (Pico_mcd->scd.Status_CDC & 1)) - cdda_start_play(); + if (!(PicoIn.AHW & PAHW_SMS)) { + Pico.video.status &= ~(SR_VB | SR_F); + Pico.video.status |= ((Pico.video.reg[1] >> 3) ^ SR_VB) & SR_VB; + Pico.video.status |= (Pico.video.pending_ints << 2) & SR_F; } - return 0; + Pico.m.dirtyPal = 1; + retval = 0; + +out: + free(buf); + return retval; } static int state_load_gfx(void *file) @@ -562,8 +622,8 @@ static int state_load_gfx(void *file) int ver, len, found = 0, to_find = 4; char buff[8]; - if (PicoAHW & PAHW_32X) - to_find += 2; + if (PicoIn.AHW & PAHW_32X) + to_find += 3; g_read_offs = 0; CHECKED_READ(8, buff); @@ -576,30 +636,33 @@ static int state_load_gfx(void *file) CHECKED_READ(1, buff); CHECKED_READ(4, &len); if (len < 0 || len > 1024*512) R_ERROR_RETURN("bad length"); - if (buff[0] > CHUNK_FM && buff[0] <= CHUNK_MISC_CD && !(PicoAHW & PAHW_MCD)) + if (buff[0] > CHUNK_FM && buff[0] <= CHUNK_MISC_CD && !(PicoIn.AHW & PAHW_MCD)) R_ERROR_RETURN("cd chunk in non CD state?"); switch (buff[0]) { - case CHUNK_VRAM: CHECKED_READ_BUFF(Pico.vram); found++; break; - case CHUNK_CRAM: CHECKED_READ_BUFF(Pico.cram); found++; break; - case CHUNK_VSRAM: CHECKED_READ_BUFF(Pico.vsram); found++; break; + case CHUNK_VRAM: CHECKED_READ_BUFF(PicoMem.vram); found++; break; + case CHUNK_CRAM: CHECKED_READ_BUFF(PicoMem.cram); found++; break; + case CHUNK_VSRAM: CHECKED_READ_BUFF(PicoMem.vsram); found++; break; case CHUNK_VIDEO: CHECKED_READ_BUFF(Pico.video); found++; break; #ifndef NO_32X case CHUNK_DRAM: if (Pico32xMem != NULL) CHECKED_READ_BUFF(Pico32xMem->dram); + found++; break; case CHUNK_32XPAL: if (Pico32xMem != NULL) CHECKED_READ_BUFF(Pico32xMem->pal); + found++; Pico32x.dirty_pal = 1; break; case CHUNK_32XSYS: CHECKED_READ_BUFF(Pico32x); + found++; break; #endif default: @@ -608,6 +671,7 @@ static int state_load_gfx(void *file) } } +out: readend: return 0; } @@ -618,17 +682,8 @@ static int pico_state_internal(void *afile, int is_save) if (is_save) ret = state_save(afile); - else { + else ret = state_load(afile); - if (ret != 0) { - areaSeek(afile, 0, SEEK_SET); - ret = state_load_legacy(afile); - } - - if (PicoLoadStateHook != NULL) - PicoLoadStateHook(); - Pico.m.dirtyPal = 1; - } return ret; } @@ -672,14 +727,17 @@ int PicoStateLoadGfx(const char *fname) if (ret != 0) { // assume legacy areaSeek(afile, 0x10020, SEEK_SET); // skip header and RAM - areaRead(Pico.vram, 1, sizeof(Pico.vram), afile); + areaRead(PicoMem.vram, 1, sizeof(PicoMem.vram), afile); areaSeek(afile, 0x2000, SEEK_CUR); - areaRead(Pico.cram, 1, sizeof(Pico.cram), afile); - areaRead(Pico.vsram, 1, sizeof(Pico.vsram), afile); + areaRead(PicoMem.cram, 1, sizeof(PicoMem.cram), afile); + areaRead(PicoMem.vsram, 1, sizeof(PicoMem.vsram), afile); areaSeek(afile, 0x221a0, SEEK_SET); areaRead(&Pico.video, 1, sizeof(Pico.video), afile); } areaClose(afile); + + PicoVideoCacheSAT(1); + Pico.est.rendstatus = -1; return 0; } @@ -689,6 +747,7 @@ struct PicoTmp unsigned short vram[0x8000]; unsigned short cram[0x40]; unsigned short vsram[0x40]; + unsigned int satcache[2*0x80]; //struct PicoMisc m; struct PicoVideo video; @@ -708,13 +767,14 @@ void *PicoTmpStateSave(void) if (t == NULL) return NULL; - memcpy(t->vram, Pico.vram, sizeof(Pico.vram)); - memcpy(t->cram, Pico.cram, sizeof(Pico.cram)); - memcpy(t->vsram, Pico.vsram, sizeof(Pico.vsram)); + memcpy(t->vram, PicoMem.vram, sizeof(PicoMem.vram)); + memcpy(t->cram, PicoMem.cram, sizeof(PicoMem.cram)); + memcpy(t->vsram, PicoMem.vsram, sizeof(PicoMem.vsram)); + memcpy(t->satcache, VdpSATCache, sizeof(VdpSATCache)); memcpy(&t->video, &Pico.video, sizeof(Pico.video)); #ifndef NO_32X - if (PicoAHW & PAHW_32X) { + if (PicoIn.AHW & PAHW_32X) { memcpy(&t->t32x.p32x, &Pico32x, sizeof(Pico32x)); memcpy(t->t32x.dram, Pico32xMem->dram, sizeof(Pico32xMem->dram)); memcpy(t->t32x.pal, Pico32xMem->pal, sizeof(Pico32xMem->pal)); @@ -730,20 +790,23 @@ void PicoTmpStateRestore(void *data) if (t == NULL) return; - memcpy(Pico.vram, t->vram, sizeof(Pico.vram)); - memcpy(Pico.cram, t->cram, sizeof(Pico.cram)); - memcpy(Pico.vsram, t->vsram, sizeof(Pico.vsram)); + memcpy(PicoMem.vram, t->vram, sizeof(PicoMem.vram)); + memcpy(PicoMem.cram, t->cram, sizeof(PicoMem.cram)); + memcpy(PicoMem.vsram, t->vsram, sizeof(PicoMem.vsram)); + memcpy(VdpSATCache, t->satcache, sizeof(VdpSATCache)); memcpy(&Pico.video, &t->video, sizeof(Pico.video)); Pico.m.dirtyPal = 1; + PicoVideoCacheSAT(0); #ifndef NO_32X - if (PicoAHW & PAHW_32X) { + if (PicoIn.AHW & PAHW_32X) { memcpy(&Pico32x, &t->t32x.p32x, sizeof(Pico32x)); memcpy(Pico32xMem->dram, t->t32x.dram, sizeof(Pico32xMem->dram)); memcpy(Pico32xMem->pal, t->t32x.pal, sizeof(Pico32xMem->pal)); Pico32x.dirty_pal = 1; } #endif + free(t); } // vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/videoport.c b/pico/videoport.c index 6c876aea..9c35162d 100644 --- a/pico/videoport.c +++ b/pico/videoport.c @@ -2,69 +2,520 @@ * PicoDrive * (c) Copyright Dave, 2004 * (C) notaz, 2006-2009 + * (C) irixxxx, 2020-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. */ #include "pico_int.h" +#define NEED_DMA_SOURCE +#include "memory.h" -int line_base_cycles; -extern const unsigned char hcounts_32[]; -extern const unsigned char hcounts_40[]; -#ifndef UTYPES_DEFINED -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -#define UTYPES_DEFINED -#endif +enum { clkdiv = 2 }; // CPU clock granularity: one of 1,2,4,8 -int (*PicoDmaHook)(unsigned int source, int len, unsigned short **srcp, unsigned short **limitp) = NULL; +// VDP Slot timing, taken from http://gendev.spritesmind.net/ +// forum/viewtopic.php?f=22&t=851&sid=d5701a71396ee7f700c74fb7cd85cb09 +// http://plutiedev.com/mirror/kabuto-hardware-notes +// Thank you very much for the great work, Nemesis, Kabuto! + +// Slot clock is sysclock/20 for h32 and sysclock/16 for h40. +// One scanline is 63.7us/64.3us (ntsc/pal) long which is ~488.57 68k cycles. +// Approximate by 488 for VDP. +// 1 slot is 20/7 = 2.857 68k cycles in h32, and 16/7 = 2.286 in h40. That's +// 171 slots in h32, and ~214 (really 193 plus 17 prolonged in HSYNC) in h40. +enum { slcpu = 488 }; + +// VDP has a slot counter running from 0x00 to 0xff every scanline, but it has +// a gap depending on the video mode. The slot in which a horizontal interrupt +// is generated also depends on the video mode. +// NB Kabuto says gapend40 is 0xe4. That's technically correct, since slots 0xb6 +// and 0xe4 are only half slots. Ignore 0xe4 here and make 0xb6 a full slot. +enum { hint32 = 0x85, gapstart32 = 0x94, gapend32 = 0xe9}; +enum { hint40 = 0xa5, gapstart40 = 0xb7, gapend40 = 0xe5}; + +// Basic timing in h32: 38 slots (~108.5 cycles) from hint to VDP output start +// at slot 0x00. vint takes place on the 1st VBLANK line in slot 0x01 (~111.5). +// Rendering takes 128 slots (~365.5), and right border starts at slot 0x80 +// (~474 cycles). hint occurs after 5 slots into the border (~488.5 cycles). + +// The horizontal sync period (HBLANK) is 30/37 slots (h32/h40): +// h32: 4 slots front porch (1.49us), 13 HSYNC (4.84us), 13 back porch (4.84us) +// h40: 5 slots front porch (1.49us), 16 HSYNC (4.77us), 16 back porch (4.77us) +// HBLANK starts at slot 0x93/0xb4 and ends in the middle of slot 0x05/0x06, +// NB VDP slows down the h40 clock to h32 during HSYNC for 17 slots to get the +// right sync timing. Ignored in the slot calculation, but hblen40 is correct. +enum { hboff32 = 0x93-hint32, hblen32 = 0xf8-(gapend32-gapstart32)-hint32};//30 +enum { hboff40 = 0xb4-hint40, hblen40 = 0xf8-(gapend40-gapstart40)-hint40};//37 + +// number of slots in a scanline +#define slots32 (0x100-(gapend32-gapstart32)) // 171 +#define slots40 (0x100-(gapend40-gapstart40)) // 210 + +// In blanked display, all slots but the refresh slots are usable for transfers, +// in active display only 16(h32) / 18(h40) slots can be used. + +// dma and refresh slots for active display, 16 for H32 +static u8 dmaslots32[] = + { 145,243, 2,10,18, 34,42,50, 66,74,82, 98,106,114, 129,130 }; +static u8 refslots32[] = + { 250, 26, 58, 90, 122 }; +// dma and refresh slots for active display, 18 for H40 +static u8 dmaslots40[] = + { 232, 2,10,18, 34,42,50, 66,74,82, 98,106,114, 130,138,146, 161,162 }; +static u8 refslots40[] = + { 250, 26, 58, 90, 122, 154 }; + +// table sizes +enum { cycsz = slcpu/clkdiv }; +enum { sl32blsz=slots32-sizeof(refslots32)+1, sl32acsz=sizeof(dmaslots32)+1 }; +enum { sl40blsz=slots40-sizeof(refslots40)+1, sl40acsz=sizeof(dmaslots40)+1 }; + +// Tables must be considerably larger than one scanline, since 68k emulation +// isn't stopping in the middle of an operation. If the last op is a 32 bit +// VDP access 2 slots may need to be taken from the next scanline, which can be +// more than 100 CPU cycles. For safety just cover 2 scanlines. + +// table for hvcounter mapping. check: Sonic 3D Blast bonus, Cannon Fodder, +// Chase HQ II, 3 Ninjas kick back, Road Rash 3, Skitchin', Wheel of Fortune +static u8 hcounts_32[2*cycsz], hcounts_40[2*cycsz]; +// tables mapping cycles to slots +static u16 vdpcyc2sl_32_bl[2*cycsz],vdpcyc2sl_40_bl[2*cycsz]; +static u16 vdpcyc2sl_32_ac[2*cycsz],vdpcyc2sl_40_ac[2*cycsz]; +// tables mapping slots to cycles +// NB the sl2cyc tables must cover all slots present in the cyc2sl tables. +static u16 vdpsl2cyc_32_bl[2*sl32blsz],vdpsl2cyc_40_bl[2*sl40blsz]; +static u16 vdpsl2cyc_32_ac[2*sl32acsz],vdpsl2cyc_40_ac[2*sl40acsz]; + + +// calculate timing tables for one mode (H32 or H40) +// NB tables aligned to HINT, since the main loop uses HINT as synchronization +#define INITTABLES(s) { \ + float factor = (float)slcpu/slots##s; \ + int ax, bx, rx, ac, bc; \ + int i, n; \ + \ + /* calculate internal VDP slot numbers */ \ + for (i = 0; i < cycsz; i++) { \ + n = hint##s + i*clkdiv/factor; \ + if (n >= gapstart##s) n += gapend##s-gapstart##s; \ + hcounts_##s[i] = n % 256; \ + } \ + \ + ax = bx = ac = bc = rx = 0; \ + for (i = 0; i < cycsz; i++) { \ + n = hcounts_##s[i]; \ + if (i == 0 || n != hcounts_##s[i-1]) { \ + /* fill slt <=> cycle tables, active scanline */ \ + if (ax < ARRAY_SIZE(dmaslots##s) && dmaslots##s[ax] == n) { \ + vdpsl2cyc_##s##_ac[++ax]=i; \ + while (ac < i) vdpcyc2sl_##s##_ac[ac++] = ax-1; \ + } \ + /* fill slt <=> cycle tables, scanline off */ \ + if (rx >= ARRAY_SIZE(refslots##s) || refslots##s[rx] != n) { \ + vdpsl2cyc_##s##_bl[++bx]=i; \ + while (bc < i) vdpcyc2sl_##s##_bl[bc++] = bx-1; \ + } else \ + ++rx; \ + } \ + } \ + /* fill up cycle to slot mappings for last slot */ \ + while (ac < cycsz) \ + vdpcyc2sl_##s##_ac[ac] = ARRAY_SIZE(dmaslots##s), ac++; \ + while (bc < cycsz) \ + vdpcyc2sl_##s##_bl[bc] = slots##s-ARRAY_SIZE(refslots##s), bc++; \ + \ + /* extend tables for 2nd scanline */ \ + memcpy(hcounts_##s+cycsz, hcounts_##s, ARRAY_SIZE(hcounts_##s)-cycsz);\ + i = ARRAY_SIZE(dmaslots##s); \ + while (ac < ARRAY_SIZE(vdpcyc2sl_##s##_ac)) \ + vdpcyc2sl_##s##_ac[ac] = vdpcyc2sl_##s##_ac[ac-cycsz]+i, ac++; \ + while (ax < ARRAY_SIZE(vdpsl2cyc_##s##_ac)-1) ax++, \ + vdpsl2cyc_##s##_ac[ax] = vdpsl2cyc_##s##_ac[ax-i]+cycsz; \ + i = slots##s - ARRAY_SIZE(refslots##s); \ + while (bc < ARRAY_SIZE(vdpcyc2sl_##s##_bl)) \ + vdpcyc2sl_##s##_bl[bc] = vdpcyc2sl_##s##_bl[bc-cycsz]+i, bc++; \ + while (bx < ARRAY_SIZE(vdpsl2cyc_##s##_bl)-1) bx++, \ + vdpsl2cyc_##s##_bl[bx] = vdpsl2cyc_##s##_bl[bx-i]+cycsz; \ +} + + +// initialize VDP timing tables +void PicoVideoInit(void) +{ + INITTABLES(32); + INITTABLES(40); +} + + +static int linedisabled; // display disabled on this line +static int lineenabled; // display enabled on this line +static int lineoffset; // offset at which dis/enable took place + +u32 SATaddr, SATmask; // VRAM addr of sprite attribute table + +int (*PicoDmaHook)(u32 source, int len, unsigned short **base, u32 *mask) = NULL; + + +/* VDP FIFO implementation + * + * fifo_slot: last slot executed in this scanline + * fifo_total: #total FIFO entries pending + * fifo_data: last values transferred through fifo + * fifo_queue: fifo transfer queue (#writes, flags) + * + * FIFO states: empty total=0 + * inuse total>0 && total<4 + * full total==4 + * wait total>4 + * Conditions: + * fifo_slot is normally behind slot2cyc[cycles]. Advancing it beyond cycles + * implies blocking the 68k up to that slot. + * + * A FIFO write goes to the end of the FIFO queue, but DMA running in background + * is always the last queue entry (transfers by CPU intervene and come 1st). + * There can be more pending writes than FIFO slots, but the CPU will be blocked + * until FIFO level (without background DMA) <= 4. + * This is only about correct timing, data xfer must be handled by the caller. + * Blocking the CPU means burning cycles via SekCyclesBurn*(), which is to be + * executed by the caller. + * + * FIFOSync "executes" FIFO write slots up to the given cycle in the current + * scanline. A queue entry completely executed is removed from the queue. + * FIFOWrite pushes writes to the transfer queue. If it's a blocking write, 68k + * is blocked if more than 4 FIFO writes are pending. + * FIFORead executes a 68k read. 68k is blocked until the next transfer slot. + */ + +// NB code assumes fifo_* arrays have size 2^n +static struct VdpFIFO { // XXX this must go into save file! + // last transferred FIFO data, ...x = index XXX currently only CPU + u16 fifo_data[4], fifo_dx; + + // queued FIFO transfers, ...x = index, ...l = queue length + // each entry has 2 values: [n]>>3 = #writes, [n]&7 = flags (FQ_*) + u32 fifo_queue[8], fifo_qx, fifo_ql; + int fifo_total; // total# of pending FIFO entries (w/o BGDMA) + + unsigned short fifo_slot; // last executed slot in current scanline + unsigned short fifo_maxslot;// #slots in scanline + + const unsigned short *fifo_cyc2sl; + const unsigned short *fifo_sl2cyc; + const unsigned char *fifo_hcounts; +} VdpFIFO; + +enum { FQ_BYTE = 1, FQ_BGDMA = 2, FQ_FGDMA = 4 }; // queue flags, NB: BYTE = 1! + + +// NB should limit cyc2sl to table size in case 68k overdraws its aim. That can +// happen if the last op is a blocking acess to VDP, or for exceptions (e.g.irq) +#define Cyc2Sl(vf,lc) ((vf)->fifo_cyc2sl[(lc)/clkdiv]) +#define Sl2Cyc(vf,sl) ((vf)->fifo_sl2cyc[sl]*clkdiv) + +// do the FIFO math +static int AdvanceFIFOEntry(struct VdpFIFO *vf, struct PicoVideo *pv, int slots) +{ + u32 *qx = &vf->fifo_queue[vf->fifo_qx]; + int l = slots, b = *qx & FQ_BYTE; + int cnt = *qx >> 3; + + // advance currently active FIFO entry + if (l > cnt) + l = cnt; + if (!(*qx & FQ_BGDMA)) + vf->fifo_total -= ((cnt & b) + l) >> b; + *qx -= l << 3; + + // if entry has been processed... + if (cnt == l) { + // remove entry from FIFO + *qx = 0; + vf->fifo_qx = (vf->fifo_qx+1) & 7; + vf->fifo_ql --; + } + + return l; +} + +static void SetFIFOState(struct VdpFIFO *vf, struct PicoVideo *pv) +{ + u32 st = pv->status, cmd = pv->command; + // release CPU and terminate DMA if FIFO isn't blocking the 68k anymore + if (vf->fifo_total <= 4) { + st &= ~PVS_CPUWR; + if (!(st & (PVS_DMABG|PVS_DMAFILL))) { + st &= ~SR_DMA; + cmd &= ~0x80; + } + } + if (vf->fifo_ql == 0) { + st &= ~PVS_CPURD; + // terminate DMA if applicable + if (!(st & PVS_DMAFILL)) { + st &= ~(SR_DMA|PVS_DMABG); + cmd &= ~0x80; + } + } + pv->status = st; + pv->command = cmd; +} + +// sync FIFO to cycles +void PicoVideoFIFOSync(int cycles) +{ + struct VdpFIFO *vf = &VdpFIFO; + struct PicoVideo *pv = &Pico.video; + int slots, done; + + // calculate #slots since last executed slot + slots = Cyc2Sl(vf, cycles) - vf->fifo_slot; + if (slots <= 0 || !vf->fifo_ql) return; + + // advance FIFO queue by #done slots + done = slots; + while (done > 0 && vf->fifo_ql) { + int l = AdvanceFIFOEntry(vf, pv, done); + vf->fifo_slot += l; + done -= l; + } + + if (done != slots) + SetFIFOState(vf, pv); +} + +// drain FIFO, blocking 68k on the way. FIFO must be synced prior to drain. +static int PicoVideoFIFODrain(int level, int cycles, int bgdma) +{ + struct VdpFIFO *vf = &VdpFIFO; + struct PicoVideo *pv = &Pico.video; + unsigned ocyc = cycles; + int bd = vf->fifo_queue[vf->fifo_qx] & bgdma; + int burn = 0; + + if (!(vf->fifo_ql && ((vf->fifo_total > level) | bd))) return 0; + + // process FIFO entries until low level is reached + while (vf->fifo_slot < vf->fifo_maxslot && + vf->fifo_ql && ((vf->fifo_total > level) | bd)) { + int b = vf->fifo_queue[vf->fifo_qx] & FQ_BYTE; + int c = vf->fifo_queue[vf->fifo_qx] >> 3; + int cnt = bd ? c : ((vf->fifo_total-level)<fifo_slot; + + if (slot > vf->fifo_maxslot) { + // target slot in later scanline, advance to eol + slot = vf->fifo_maxslot; + } + if (slot > vf->fifo_slot) { + // advance FIFO to target slot and CPU to cycles at that slot + vf->fifo_slot += AdvanceFIFOEntry(vf, pv, slot - vf->fifo_slot); + cycles = Sl2Cyc(vf, vf->fifo_slot); + bd = vf->fifo_queue[vf->fifo_qx] & bgdma; + } + } + if (vf->fifo_ql && ((vf->fifo_total > level) | bd)) + cycles = slcpu; // not completed in this scanline + if (cycles > ocyc) + burn = cycles - ocyc; + + SetFIFOState(vf, pv); + + return burn; +} + +// read VDP data port +static int PicoVideoFIFORead(void) +{ + struct VdpFIFO *vf = &VdpFIFO; + struct PicoVideo *pv = &Pico.video; + int lc = SekCyclesDone()-Pico.t.m68c_line_start; + int burn = 0; + + if (vf->fifo_ql) { + // advance FIFO and CPU until FIFO is empty + burn = PicoVideoFIFODrain(0, lc, FQ_BGDMA); + lc += burn; + } + + if (vf->fifo_ql) + pv->status |= PVS_CPURD; // target slot is in later scanline + else { + // use next VDP access slot for reading, block 68k until then + vf->fifo_slot = Cyc2Sl(vf, lc) + 1; + burn += Sl2Cyc(vf, vf->fifo_slot) - lc; + } + + return burn; +} + +// write VDP data port +int PicoVideoFIFOWrite(int count, int flags, unsigned sr_mask,unsigned sr_flags) +{ + struct VdpFIFO *vf = &VdpFIFO; + struct PicoVideo *pv = &Pico.video; + int lc = SekCyclesDone()-Pico.t.m68c_line_start; + int burn = 0, x; + + // sync only needed if queue is too full or background dma might be deferred + if ((vf->fifo_ql >= 6) | (pv->status & PVS_DMABG)) + PicoVideoFIFOSync(lc); + + // determine last ent, ignoring bg dma (pushed back below if new ent created) + x = (vf->fifo_qx + vf->fifo_ql - 1 - !!(pv->status & PVS_DMABG)) & 7; + + pv->status = (pv->status & ~sr_mask) | sr_flags; + vf->fifo_total += count * !(flags & FQ_BGDMA); + if (!vf->fifo_ql) + vf->fifo_slot = Cyc2Sl(vf, lc+7); // FIFO latency ~3 vdp slots + + // determine queue position for entry + count <<= (flags & FQ_BYTE)+3; + if (vf->fifo_queue[x] && (vf->fifo_queue[x] & 7) == flags) { + // amalgamate entries if of same type and not empty (in case of bgdma) + vf->fifo_queue[x] += count; + } else { + // create new xfer queue entry + vf->fifo_ql ++; + x = (x+1) & 7; + vf->fifo_queue[(x+1)&7] = vf->fifo_queue[x]; // push back bg dma if exists + vf->fifo_queue[x] = count | flags; + } + + // if CPU is waiting for the bus, advance CPU and FIFO until bus is free + // do this only if it would exhaust the available slots since last sync + x = (Cyc2Sl(vf,lc) - vf->fifo_slot) / 2; // lower bound of FIFO ents + if ((pv->status & PVS_CPUWR) && vf->fifo_total > 4 + x) + burn = PicoVideoFIFODrain(4, lc, 0); + + return burn; +} + +// at HINT, advance FIFO to new scanline +int PicoVideoFIFOHint(void) +{ + struct VdpFIFO *vf = &VdpFIFO; + struct PicoVideo *pv = &Pico.video; + int lc = SekCyclesDone()-Pico.t.m68c_line_start; + int burn = 0; + + // reset slot to start of scanline + vf->fifo_slot = 0; + // only need to refresh sprite position if we are synced + if (Pico.est.DrawScanline == Pico.m.scanline && !(pv->status & SR_VB)) + PicoDrawRefreshSprites(); + + // if CPU is waiting for the bus, advance CPU and FIFO until bus is free + if (pv->status & PVS_CPUWR) + burn = PicoVideoFIFODrain(4, lc, 0); + else if (pv->status & PVS_CPURD) + burn = PicoVideoFIFORead(); + + return burn; +} + +// switch FIFO mode between active/inactive display +void PicoVideoFIFOMode(int active, int h40) +{ + static const unsigned short *vdpcyc2sl[2][2] = + { {vdpcyc2sl_32_bl, vdpcyc2sl_40_bl},{vdpcyc2sl_32_ac, vdpcyc2sl_40_ac} }; + static const unsigned short *vdpsl2cyc[2][2] = + { {vdpsl2cyc_32_bl, vdpsl2cyc_40_bl},{vdpsl2cyc_32_ac, vdpsl2cyc_40_ac} }; + static const unsigned char *vdphcounts[2] = + { hcounts_32, hcounts_40 }; + + struct VdpFIFO *vf = &VdpFIFO; + struct PicoVideo *pv = &Pico.video; + int lc = SekCyclesDone() - Pico.t.m68c_line_start; + active = active && !(pv->status & PVS_VB2); + + if (vf->fifo_maxslot) + PicoVideoFIFOSync(lc); + else + lc = 0; + + vf->fifo_cyc2sl = vdpcyc2sl[active][h40]; + vf->fifo_sl2cyc = vdpsl2cyc[active][h40]; + vf->fifo_hcounts = vdphcounts[h40]; + // recalculate FIFO slot for new mode + vf->fifo_slot = Cyc2Sl(vf, lc); + vf->fifo_maxslot = Cyc2Sl(vf, slcpu); +} + +// VDP memory rd/wr static __inline void AutoIncrement(void) { - Pico.video.addr=(unsigned short)(Pico.video.addr+Pico.video.reg[0xf]); + struct PicoVideo *pvid = &Pico.video; + pvid->addr=(unsigned short)(pvid->addr+pvid->reg[0xf]); + if (pvid->addr < pvid->reg[0xf]) pvid->addr_u ^= 1; +} + +static NOINLINE void VideoWriteVRAM128(u32 a, u16 d) +{ + // nasty + u32 b = ((a & 2) >> 1) | ((a & 0x400) >> 9) | (a & 0x3FC) | ((a & 0x1F800) >> 1); + + ((u8 *)PicoMem.vram)[b] = d; + if (!(u16)((b^SATaddr) & SATmask)) + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; + + if (((a^SATaddr) & SATmask) == 0) + UpdateSAT(a, d); } static void VideoWrite(u16 d) { - unsigned int a=Pico.video.addr; + struct PicoVideo *pvid = &Pico.video; + unsigned int a = pvid->addr; - switch (Pico.video.type) + switch (pvid->type) { - case 1: if(a&1) d=(u16)((d<<8)|(d>>8)); // If address is odd, bytes are swapped (which game needs this?) - Pico.vram [(a>>1)&0x7fff]=d; - if (a - ((unsigned)(Pico.video.reg[5]&0x7f) << 9) < 0x400) - rendstatus |= PDRAW_DIRTY_SPRITES; + case 1: if (a & 1) + d = (u16)((d << 8) | (d >> 8)); + a |= pvid->addr_u << 16; + VideoWriteVRAM(a, d); break; - case 3: Pico.m.dirtyPal = 1; - Pico.cram [(a>>1)&0x003f]=d; break; // wraps (Desert Strike) - case 5: Pico.vsram[(a>>1)&0x003f]=d; break; - //default:elprintf(EL_ANOMALY, "VDP write %04x with bad type %i", d, Pico.video.type); break; + case 3: if (PicoMem.cram [(a >> 1) & 0x3f] != (d & 0xeee)) Pico.m.dirtyPal = 1; + PicoMem.cram [(a >> 1) & 0x3f] = d & 0xeee; break; + case 5: PicoMem.vsram[(a >> 1) & 0x3f] = d & 0x7ff; break; + case 0x81: + a |= pvid->addr_u << 16; + VideoWriteVRAM128(a, d); + break; + //default:elprintf(EL_ANOMALY, "VDP write %04x with bad type %i", d, pvid->type); break; } AutoIncrement(); } -static unsigned int VideoRead(void) +static unsigned int VideoRead(int is_from_z80) { - unsigned int a=0,d=0; + struct PicoVideo *pvid = &Pico.video; + unsigned int a, d = VdpFIFO.fifo_data[(VdpFIFO.fifo_dx+1)&3]; - a=Pico.video.addr; a>>=1; + a=pvid->addr; a>>=1; - switch (Pico.video.type) + if (!is_from_z80) + SekCyclesBurnRun(PicoVideoFIFORead()); + switch (pvid->type) { - case 0: d=Pico.vram [a&0x7fff]; break; - case 8: d=Pico.cram [a&0x003f]; break; - case 4: d=Pico.vsram[a&0x003f]; break; - default:elprintf(EL_ANOMALY, "VDP read with bad type %i", Pico.video.type); break; + case 0: d=PicoMem.vram [a & 0x7fff]; break; + case 8: d=PicoMem.cram [a & 0x003f] | (d & ~0x0eee); break; + case 4: if ((a & 0x3f) >= 0x28) a = 0; + d=PicoMem.vsram [a & 0x003f] | (d & ~0x07ff); break; + case 12:a=PicoMem.vram [a & 0x7fff]; if (pvid->addr&1) a >>= 8; + d=(a & 0x00ff) | (d & ~0x00ff); break; + default:elprintf(EL_ANOMALY, "VDP read with bad type %i", pvid->type); break; } AutoIncrement(); return d; } +// VDP DMA + static int GetDmaLength(void) { struct PicoVideo *pvid=&Pico.video; @@ -72,315 +523,424 @@ static int GetDmaLength(void) // 16-bit words to transfer: len =pvid->reg[0x13]; len|=pvid->reg[0x14]<<8; - // Charles MacDonald: - if(!len) len = 0xffff; + len = ((len - 1) & 0xffff) + 1; return len; } -static void DmaSlow(int len) +static void DmaSlow(int len, u32 source) { - u16 *pd=0, *pdend, *r; - unsigned int a=Pico.video.addr, a2, d; - unsigned char inc=Pico.video.reg[0xf]; - unsigned int source; + struct PicoVideo *pvid=&Pico.video; + u32 inc = pvid->reg[0xf]; + u32 a = pvid->addr | (pvid->addr_u << 16), e; + u16 *r, *base = NULL; + u32 mask = 0x1ffff; + int lc = SekCyclesDone()-Pico.t.m68c_line_start; - source =Pico.video.reg[0x15]<<1; - source|=Pico.video.reg[0x16]<<9; - source|=Pico.video.reg[0x17]<<17; - - elprintf(EL_VDPDMA, "DmaSlow[%i] %06x->%04x len %i inc=%i blank %i [%i] @ %06x", - Pico.video.type, source, a, len, inc, (Pico.video.status&8)||!(Pico.video.reg[1]&0x40), + elprintf(EL_VDPDMA, "DmaSlow[%i] %06x->%04x len %i inc=%i blank %i [%u] @ %06x", + pvid->type, source, a, len, inc, (pvid->status&SR_VB)||!(pvid->reg[1]&0x40), SekCyclesDone(), SekPc); - Pico.m.dma_xfers += len; - SekCyclesBurnRun(CheckDMA()); + SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_FGDMA | (pvid->type == 1), + PVS_DMABG, SR_DMA | PVS_CPUWR)); + // short transfers might have been completely conveyed to FIFO, adjust state + if ((pvid->status & SR_DMA) && VdpFIFO.fifo_total <= 4) + SetFIFOState(&VdpFIFO, pvid); - if ((source&0xe00000)==0xe00000) { // Ram - pd=(u16 *)(Pico.ram+(source&0xfffe)); - pdend=(u16 *)(Pico.ram+0x10000); + if ((source & 0xe00000) == 0xe00000) { // Ram + base = (u16 *)PicoMem.ram; + mask = 0xffff; } - else if (PicoAHW & PAHW_MCD) + else if (PicoIn.AHW & PAHW_MCD) { - elprintf(EL_VDPDMA, "DmaSlow CD, r3=%02x", Pico_mcd->s68k_regs[3]); - if(source<0x20000) { // Bios area - pd=(u16 *)(Pico_mcd->bios+(source&~1)); - pdend=(u16 *)(Pico_mcd->bios+0x20000); - } else if ((source&0xfc0000)==0x200000) { // Word Ram - source -= 2; - if (!(Pico_mcd->s68k_regs[3]&4)) { // 2M mode - pd=(u16 *)(Pico_mcd->word_ram2M+(source&0x3fffe)); - pdend=(u16 *)(Pico_mcd->word_ram2M+0x40000); + u8 r3 = Pico_mcd->s68k_regs[3]; + elprintf(EL_VDPDMA, "DmaSlow CD, r3=%02x", r3); + if (source < Pico.romsize /*0x20000*/) { // Bios area + base = (u16 *)(Pico.rom + (source & 0xfe0000)); + } else if ((source & 0xfc0000) == pcd_base_address+0x200000) { // Word Ram + if (!(r3 & 4)) { // 2M mode + base = (u16 *)(Pico_mcd->word_ram2M + (source & 0x20000)); } else { - if (source < 0x220000) { // 1M mode - int bank = Pico_mcd->s68k_regs[3]&1; - pd=(u16 *)(Pico_mcd->word_ram1M[bank]+(source&0x1fffe)); - pdend=(u16 *)(Pico_mcd->word_ram1M[bank]+0x20000); + if ((source & 0xfe0000) < pcd_base_address+0x220000) { // 1M mode + int bank = r3 & 1; + base = (u16 *)(Pico_mcd->word_ram1M[bank]); } else { - DmaSlowCell(source, a, len, inc); + DmaSlowCell(source - 2, a, len, inc); return; } } - } else if ((source&0xfe0000)==0x020000) { // Prg Ram - u8 *prg_ram = Pico_mcd->prg_ram_b[Pico_mcd->s68k_regs[3]>>6]; - pd=(u16 *)(prg_ram+(source&0x1fffe)); - pdend=(u16 *)(prg_ram+0x20000); - } else { - elprintf(EL_VDPDMA|EL_ANOMALY, "DmaSlow[%i] %06x->%04x: FIXME: unsupported src", Pico.video.type, source, a); - return; + source -= 2; + } else if ((source & 0xfe0000) == pcd_base_address+0x020000) { // Prg Ram + base = (u16 *)Pico_mcd->prg_ram_b[r3 >> 6]; + source -= 2; // XXX: test } } else { // if we have DmaHook, let it handle ROM because of possible DMA delay - if (PicoDmaHook && PicoDmaHook(source, len, &pd, &pdend)); - else if (source%04x: invalid src", Pico.video.type, source, a); - return; - } + u32 source2; + if (PicoDmaHook && (source2 = PicoDmaHook(source, len, &base, &mask))) + source = source2; + else // Rom + base = m68k_dma_source(source); + } + if (!base) { + elprintf(EL_VDPDMA|EL_ANOMALY, "DmaSlow[%i] %06x->%04x: invalid src", pvid->type, source, a); + return; } - // overflow protection, might break something.. - if (len > pdend - pd) { - len = pdend - pd; - elprintf(EL_VDPDMA|EL_ANOMALY, "DmaSlow overflow"); - } + // operate in words + source >>= 1; + mask >>= 1; - switch (Pico.video.type) + switch (pvid->type) { case 1: // vram - r = Pico.vram; - if (inc == 2 && !(a&1) && a+len*2 < 0x10000) + e = a + len*2-1; + r = PicoMem.vram; + if (inc == 2 && !(a & 1) && !((a ^ e) >> 16) && + ((a >= SATaddr + 0x280) | (e < SATaddr)) && + !((source ^ (source + len-1)) & ~mask)) { // most used DMA mode - memcpy16(r + (a>>1), pd, len); - a += len*2; + memcpy((char *)r + a, base + (source & mask), len * 2); + a += len * 2; + break; } - else + for(; len; len--) { - for(; len; len--) - { - d=*pd++; - if(a&1) d=(d<<8)|(d>>8); - r[a>>1] = (u16)d; // will drop the upper bits - // AutoIncrement - a=(u16)(a+inc); - // didn't src overlap? - //if(pd >= pdend) pd-=0x8000; // should be good for RAM, bad for ROM - } + u16 d = base[source++ & mask]; + if(a & 1) d=(d<<8)|(d>>8); + VideoWriteVRAM(a, d); + // AutoIncrement + a = (a+inc) & ~0x20000; } - rendstatus |= PDRAW_DIRTY_SPRITES; break; case 3: // cram Pico.m.dirtyPal = 1; - r = Pico.cram; - for(a2=a&0x7f; len; len--) - { - r[a2>>1] = (u16)*pd++; // bit 0 is ignored - // AutoIncrement - a2+=inc; - // didn't src overlap? - //if(pd >= pdend) pd-=0x8000; - // good dest? - if(a2 >= 0x80) break; // Todds Adventures in Slime World / Andre Agassi tennis + r = PicoMem.cram; + if (inc == 0 && !(pvid->reg[1] & 0x40) && + (pvid->reg[7] & 0x3f) == ((a/2) & 0x3f)) { // bg color DMA + PicoVideoSync(1); + int sl = VdpFIFO.fifo_hcounts[lc/clkdiv]; + if (sl > VdpFIFO.fifo_hcounts[0]-5) // hint delay is 5 slots + sl = (s8)sl; + // TODO this is needed to cover timing inaccuracies + if (sl <= 12) sl = -3; + else if (sl <= 40) sl = 30; + PicoDrawBgcDMA(base, source, mask, len, sl); + // do last DMA cycle since it's all going to the same cram location + source = source+len-1; + len = 1; + } + for (; len; len--) + { + r[(a / 2) & 0x3f] = base[source++ & mask] & 0xeee; + // AutoIncrement + a = (a+inc) & ~0x20000; } - a=(a&0xff00)|a2; break; - case 5: // vsram[a&0x003f]=d; - r = Pico.vsram; - for(a2=a&0x7f; len; len--) + case 5: // vsram + r = PicoMem.vsram; + for (; len; len--) { - r[a2>>1] = (u16)*pd++; + r[(a / 2) & 0x3f] = base[source++ & mask] & 0x7ff; // AutoIncrement - a2+=inc; - // didn't src overlap? - //if(pd >= pdend) pd-=0x8000; - // good dest? - if(a2 >= 0x80) break; + a = (a+inc) & ~0x20000; + } + break; + + case 0x81: // vram 128k + for(; len; len--) + { + u16 d = base[source++ & mask]; + VideoWriteVRAM128(a, d); + // AutoIncrement + a = (a+inc) & ~0x20000; } - a=(a&0xff00)|a2; break; default: - if (Pico.video.type != 0 || (EL_LOGMASK & EL_VDPDMA)) - elprintf(EL_VDPDMA|EL_ANOMALY, "DMA with bad type %i", Pico.video.type); + if (pvid->type != 0 || (EL_LOGMASK & EL_VDPDMA)) + elprintf(EL_VDPDMA|EL_ANOMALY, "DMA with bad type %i", pvid->type); break; } // remember addr - Pico.video.addr=(u16)a; + pvid->addr = a; + pvid->addr_u = a >> 16; } static void DmaCopy(int len) { - u16 a=Pico.video.addr; - unsigned char *vr = (unsigned char *) Pico.vram; - unsigned char *vrs; - unsigned char inc=Pico.video.reg[0xf]; + struct PicoVideo *pvid=&Pico.video; + u32 a = pvid->addr | (pvid->addr_u << 16); + u8 *vr = (u8 *)PicoMem.vram; + u8 inc = pvid->reg[0xf]; int source; - elprintf(EL_VDPDMA, "DmaCopy len %i [%i]", len, SekCyclesDone()); + elprintf(EL_VDPDMA, "DmaCopy len %i [%u]", len, SekCyclesDone()); - Pico.m.dma_xfers += len; - Pico.video.status |= 2; // dma busy + // XXX implement VRAM 128k? Is this even working? xfer/count still in bytes? + SekCyclesBurnRun(PicoVideoFIFOWrite(2*len, FQ_BGDMA, // 2 slots each (rd+wr) + PVS_CPUWR, SR_DMA | PVS_DMABG)); - source =Pico.video.reg[0x15]; - source|=Pico.video.reg[0x16]<<8; - vrs=vr+source; - - if (source+len > 0x10000) len=0x10000-source; // clip?? + source =pvid->reg[0x15]; + source|=pvid->reg[0x16]<<8; for (; len; len--) { - vr[a] = *vrs++; + vr[(u16)a] = vr[(u16)(source++)]; + if (((a^SATaddr) & SATmask) == 0) + UpdateSAT(a, ((u16 *)vr)[(u16)a >> 1]); // AutoIncrement - a=(u16)(a+inc); + a = (a+inc) & ~0x20000; } // remember addr - Pico.video.addr=a; - rendstatus |= PDRAW_DIRTY_SPRITES; + pvid->addr = a; + pvid->addr_u = a >> 16; } -// check: Contra, Megaman -// note: this is still inaccurate -static void DmaFill(int data) -{ - int len; - unsigned short a=Pico.video.addr; - unsigned char *vr=(unsigned char *) Pico.vram; - unsigned char high = (unsigned char) (data >> 8); - unsigned char inc=Pico.video.reg[0xf]; - - len=GetDmaLength(); - elprintf(EL_VDPDMA, "DmaFill len %i inc %i [%i]", len, inc, SekCyclesDone()); - - Pico.m.dma_xfers += len; - Pico.video.status |= 2; // dma busy - - // from Charles MacDonald's genvdp.txt: - // Write lower byte to address specified - vr[a] = (unsigned char) data; - a=(u16)(a+inc); - - if (!inc) len=1; - - for (; len; len--) { - // Write upper byte to adjacent address - // (here we are byteswapped, so address is already 'adjacent') - vr[a] = high; - - // Increment address register - a=(u16)(a+inc); - } - // remember addr - Pico.video.addr=a; - // update length - Pico.video.reg[0x13] = Pico.video.reg[0x14] = 0; // Dino Dini's Soccer (E) (by Haze) - - rendstatus |= PDRAW_DIRTY_SPRITES; -} - -static void CommandDma(void) +static NOINLINE void DmaFill(int data) { struct PicoVideo *pvid=&Pico.video; - int len=0,method=0; + u32 a = pvid->addr | (pvid->addr_u << 16), e; + u8 *vr = (u8 *)PicoMem.vram; + u8 high = (u8)(data >> 8); + u8 inc = pvid->reg[0xf]; + int source; + int len, l; - if ((pvid->reg[1]&0x10)==0) return; // DMA not enabled + len = GetDmaLength(); + elprintf(EL_VDPDMA, "DmaFill len %i inc %i [%u]", len, inc, SekCyclesDone()); - len=GetDmaLength(); + SekCyclesBurnRun(PicoVideoFIFOWrite(len, FQ_BGDMA, // 1 slot each (wr) + PVS_CPUWR | PVS_DMAFILL, SR_DMA | PVS_DMABG)); + + switch (pvid->type) + { + case 1: // vram + e = a + len-1; + if (inc == 1 && !((a ^ e) >> 16) && + ((a >= SATaddr + 0x280) | (e < SATaddr))) + { + // most used DMA mode + memset(vr + (u16)a, high, len); + a += len; + break; + } + for (l = len; l; l--) { + // Write upper byte to adjacent address + // (here we are byteswapped, so address is already 'adjacent') + vr[(u16)a] = high; + if (((a^SATaddr) & SATmask) == 0) + UpdateSAT(a, ((u16 *)vr)[(u16)a >> 1]); + + // Increment address register + a = (a+inc) & ~0x20000; + } + break; + case 3: // cram + Pico.m.dirtyPal = 1; + data &= 0xeee; + for (l = len; l; l--) { + PicoMem.cram[(a/2) & 0x3f] = data; + + // Increment address register + a = (a+inc) & ~0x20000; + } + break; + case 5: { // vsram + data &= 0x7ff; + for (l = len; l; l--) { + PicoMem.vsram[(a/2) & 0x3f] = data; + + // Increment address register + a = (a+inc) & ~0x20000; + } + break; + } + case 0x81: // vram 128k + for (l = len; l; l--) { + VideoWriteVRAM128(a, data); + + // Increment address register + a = (a+inc) & ~0x20000; + } + break; + default: + a += len * inc; + break; + } + + // remember addr + pvid->addr = a; + pvid->addr_u = a >> 16; + // register update + pvid->reg[0x13] = pvid->reg[0x14] = 0; + source = pvid->reg[0x15]; + source |= pvid->reg[0x16] << 8; + source += len; + pvid->reg[0x15] = source; + pvid->reg[0x16] = source >> 8; +} + +// VDP command handling + +static NOINLINE void CommandDma(void) +{ + struct PicoVideo *pvid = &Pico.video; + u32 len, method; + u32 source; + + PicoVideoFIFOSync(SekCyclesDone()-Pico.t.m68c_line_start); + if (pvid->status & SR_DMA) { + elprintf(EL_VDPDMA, "Dma overlap, left=%d @ %06x", + VdpFIFO.fifo_total, SekPc); + VdpFIFO.fifo_total = VdpFIFO.fifo_ql = 0; + pvid->status &= ~PVS_DMAFILL; + } + + len = GetDmaLength(); + source = pvid->reg[0x15]; + source |= pvid->reg[0x16] << 8; + source |= pvid->reg[0x17] << 16; method=pvid->reg[0x17]>>6; - if (method< 2) DmaSlow(len); // 68000 to VDP - if (method==3) DmaCopy(len); // VRAM Copy -} - -static void CommandChange(void) -{ - struct PicoVideo *pvid=&Pico.video; - unsigned int cmd=0,addr=0; - - cmd=pvid->command; - - // Get type of transfer 0xc0000030 (v/c/vsram read/write) - pvid->type=(unsigned char)(((cmd>>2)&0xc)|(cmd>>30)); - - // Get address 0x3fff0003 - addr =(cmd>>16)&0x3fff; - addr|=(cmd<<14)&0xc000; - pvid->addr=(unsigned short)addr; - - // Check for dma: - if (cmd&0x80) CommandDma(); -} - -static void DrawSync(int blank_on) -{ - if (Pico.m.scanline < 224 && !(PicoOpt & POPT_ALT_RENDERER) && - !PicoSkipFrame && DrawScanline <= Pico.m.scanline) { - //elprintf(EL_ANOMALY, "sync"); - PicoDrawSync(Pico.m.scanline, blank_on); - } -} - -PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) -{ - struct PicoVideo *pvid=&Pico.video; - - //if (Pico.m.scanline < 224) - // elprintf(EL_STATUS, "PicoVideoWrite [%06x] %04x", a, d); - a&=0x1c; - - if (a==0x00) // Data port 0 or 2 - { - // try avoiding the sync.. - if (Pico.m.scanline < 224 && (pvid->reg[1]&0x40) && - !(!pvid->pending && - ((pvid->command & 0xc00000f0) == 0x40000010 && Pico.vsram[pvid->addr>>1] == d)) - ) - DrawSync(0); - - if (pvid->pending) { - CommandChange(); - pvid->pending=0; - } - - // If a DMA fill has been set up, do it - if ((pvid->command&0x80) && (pvid->reg[1]&0x10) && (pvid->reg[0x17]>>6)==2) - { - DmaFill(d); - } - else - { - // preliminary FIFO emulation for Chaos Engine, The (E) - if (!(pvid->status&8) && (pvid->reg[1]&0x40) && !(PicoOpt&POPT_DIS_VDP_FIFO)) // active display? - { - pvid->status&=~0x200; // FIFO no longer empty - pvid->lwrite_cnt++; - if (pvid->lwrite_cnt >= 4) pvid->status|=0x100; // FIFO full - if (pvid->lwrite_cnt > 4) { - SekCyclesBurnRun(32); // penalty // 488/12-8 - } - elprintf(EL_ASVDP, "VDP data write: %04x [%06x] {%i} #%i @ %06x", d, Pico.video.addr, - Pico.video.type, pvid->lwrite_cnt, SekPc); - } - VideoWrite(d); - } + if (method < 2) + DmaSlow(len, source << 1); // 68000 to VDP + else if (method == 3) + DmaCopy(len); // VRAM Copy + else { + pvid->status |= SR_DMA|PVS_DMAFILL; return; } + source += len; + pvid->reg[0x13] = pvid->reg[0x14] = 0; + pvid->reg[0x15] = source; + pvid->reg[0x16] = source >> 8; +} - if (a==0x04) // Control (command) port 4 or 6 +static NOINLINE void CommandChange(struct PicoVideo *pvid) +{ + unsigned int cmd, addr; + + cmd = pvid->command; + + // Get type of transfer 0xc0000030 (v/c/vsram read/write) + pvid->type = (u8)(((cmd >> 2) & 0xc) | (cmd >> 30)); + if (pvid->type == 1) // vram + pvid->type |= pvid->reg[1] & 0x80; // 128k + + // Get address 0x3fff0003 + addr = (cmd >> 16) & 0x3fff; + addr |= (cmd << 14) & 0xc000; + pvid->addr = (u16)addr; + pvid->addr_u = (u8)((cmd >> 2) & 1); +} + +// VDP interface + +static inline int InHblank(int offs) +{ + // check if in left border (14 pixels) or HBLANK (86 pixels), 116 68k cycles + return SekCyclesDone() - Pico.t.m68c_line_start <= offs; +} + +void PicoVideoSync(int skip) +{ + struct VdpFIFO *vf = &VdpFIFO; + int lines = Pico.video.reg[1]&0x08 ? 240 : 224; + int last = Pico.m.scanline - (skip > 0); + + if (!(PicoIn.opt & POPT_ALT_RENDERER) && !PicoIn.skipFrame) { + if (last >= lines) + last = lines-1; + else // in active display, need to sync next frame as well + Pico.est.rendstatus |= PDRAW_SYNC_NEXT; + + //elprintf(EL_ANOMALY, "sync"); + if (unlikely(linedisabled >= 0 && linedisabled <= last)) { + if (Pico.est.DrawScanline <= linedisabled) { + int sl = vf->fifo_hcounts[lineoffset/clkdiv]; + PicoDrawSync(linedisabled, sl ? sl : 1, 0); + } + linedisabled = -1; + } + if (unlikely(lineenabled >= 0 && lineenabled <= last)) { + if (Pico.est.DrawScanline <= lineenabled) { + int sl = vf->fifo_hcounts[lineoffset/clkdiv]; + PicoDrawSync(lineenabled, 0, sl ? sl : 1); + } + lineenabled = -1; + } + if (Pico.est.DrawScanline <= last) + PicoDrawSync(last, 0, 0); + } + if (skip >= 0) + Pico.est.rendstatus |= PDRAW_SYNC_NEEDED; +} + +PICO_INTERNAL_ASM void PicoVideoWrite(u32 a,unsigned short d) +{ + struct PicoVideo *pvid=&Pico.video; + + //elprintf(EL_STATUS, "PicoVideoWrite [%06x] %04x [%u] @ %06x", + // a, d, SekCyclesDone(), SekPc); + + a &= 0x1c; + switch (a) { + case 0x00: // Data port 0 or 2 + if (pvid->pending) { + CommandChange(pvid); + pvid->pending=0; + } + + // try avoiding the sync if the data doesn't change. + // Writes to the SAT in VRAM are special since they update the SAT cache. + if ((pvid->reg[1]&0x40) && + !(pvid->type == 1 && !(pvid->addr&1) && ((pvid->addr^SATaddr)&SATmask) && PicoMem.vram[pvid->addr>>1] == d) && + !(pvid->type == 3 && PicoMem.cram[(pvid->addr>>1) & 0x3f] == (d & 0xeee)) && + !(pvid->type == 5 && PicoMem.vsram[(pvid->addr>>1) & 0x3f] == (d & 0x7ff))) + // the vertical scroll value for this line must be read from VSRAM early, + // since the A/B tile row to be read depends on it. E.g. Skitchin, OD2 + // in contrast, CRAM writes would have an immediate effect on the current + // pixel, so sync can be closer to start of actual image data + PicoVideoSync(InHblank(pvid->type == 3 ? 103 : 30)); // cram in Toy Story + + if (!(PicoIn.opt&POPT_DIS_VDP_FIFO)) + { + VdpFIFO.fifo_data[++VdpFIFO.fifo_dx&3] = d; + SekCyclesBurnRun(PicoVideoFIFOWrite(1, pvid->type == 1, 0, PVS_CPUWR)); + + elprintf(EL_ASVDP, "VDP data write: [%04x] %04x [%u] {%i} @ %06x", + pvid->addr, d, SekCyclesDone(), pvid->type, SekPc); + } + VideoWrite(d); + + // start DMA fill on write. NB VSRAM and CRAM fills use wrong FIFO data. + if (pvid->status & PVS_DMAFILL) + DmaFill(VdpFIFO.fifo_data[(VdpFIFO.fifo_dx + !!(pvid->type&~0x81))&3]); + + break; + + case 0x04: // Control (command) port 4 or 6 + if (pvid->status & SR_DMA) + SekCyclesBurnRun(PicoVideoFIFORead()); // kludge, flush out running DMA if (pvid->pending) { - if (d & 0x80) DrawSync(0); // only need sync for DMA // Low word of command: - pvid->command&=0xffff0000; - pvid->command|=d; - pvid->pending=0; - CommandChange(); + if (!(pvid->reg[1]&0x10)) + d = (d&~0x80)|(pvid->command&0x80); + pvid->command &= 0xffff0000; + pvid->command |= d; + pvid->pending = 0; + CommandChange(pvid); + // Check for dma: + if (d & 0x80) { + PicoVideoSync(InHblank(93)); + CommandDma(); + } } else { @@ -389,51 +949,92 @@ PICO_INTERNAL_ASM void PicoVideoWrite(unsigned int a,unsigned short d) // Register write: int num=(d>>8)&0x1f; int dold=pvid->reg[num]; - int blank_on = 0; pvid->type=0; // register writes clear command (else no Sega logo in Golden Axe II) if (num > 0x0a && !(pvid->reg[1]&4)) { elprintf(EL_ANOMALY, "%02x written to reg %02x in SMS mode @ %06x", d, num, SekPc); return; } - if (num == 1 && !(d&0x40) && SekCyclesDone() - line_base_cycles <= 488-390) - blank_on = 1; - DrawSync(blank_on); - pvid->reg[num]=(unsigned char)d; + d &= 0xff; + + if (num == 1 && ((pvid->reg[1]^d)&0x40)) { + // handle line blanking before line rendering. Only the last switch + // before the 1st sync for other reasons is honoured. Switching after + // active area is on next line + int skip = InHblank(470); // Deadly Moves + PicoVideoSync(skip); + lineenabled = (d&0x40) ? Pico.m.scanline + !skip: -1; + linedisabled = (d&0x40) ? -1 : Pico.m.scanline + !skip; + lineoffset = (skip ? SekCyclesDone() - Pico.t.m68c_line_start : 0); + } else if (((1<reg[num] != d) + // VDP regs 0-7,11-13,16-18 influence rendering, ignore all others + PicoVideoSync(InHblank(93)); // Toy Story + pvid->reg[num] = d; + switch (num) { case 0x00: - elprintf(EL_INTSW, "hint_onoff: %i->%i [%i] pend=%i @ %06x", (dold&0x10)>>4, + if ((~dold&d)&2) { + unsigned c = SekCyclesDone() - Pico.t.m68c_line_start; + pvid->hv_latch = VdpFIFO.fifo_hcounts[c/clkdiv] | (pvid->v_counter << 8); + } + elprintf(EL_INTSW, "hint_onoff: %i->%i [%u] pend=%i @ %06x", (dold&0x10)>>4, (d&0x10)>>4, SekCyclesDone(), (pvid->pending_ints&0x10)>>4, SekPc); goto update_irq; case 0x01: - elprintf(EL_INTSW, "vint_onoff: %i->%i [%i] pend=%i @ %06x", (dold&0x20)>>5, + if ((d^dold)&0x40) + PicoVideoFIFOMode(d & 0x40, pvid->reg[12]&1); + if (!(pvid->status & PVS_VB2)) + pvid->status &= ~SR_VB; + pvid->status |= ((d >> 3) ^ SR_VB) & SR_VB; // forced blanking + elprintf(EL_INTSW, "vint_onoff: %i->%i [%u] pend=%i @ %06x", (dold&0x20)>>5, (d&0x20)>>5, SekCyclesDone(), (pvid->pending_ints&0x20)>>5, SekPc); goto update_irq; case 0x05: - //elprintf(EL_STATUS, "spritep moved to %04x", (unsigned)(Pico.video.reg[5]&0x7f) << 9); - if (d^dold) rendstatus |= PDRAW_SPRITES_MOVED; + case 0x06: + if (d^dold) Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; break; case 0x0c: // renderers should update their palettes if sh/hi mode is changed - if ((d^dold)&8) Pico.m.dirtyPal = 2; + if ((d^dold)&8) Pico.m.dirtyPal = 1; + if ((d^dold)&1) { + PicoVideoFIFOMode(pvid->reg[1]&0x40, d & 1); + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; + } break; + default: + return; + } + if (Pico.est.rendstatus & PDRAW_DIRTY_SPRITES) { + SATaddr = ((pvid->reg[5]&0x7f) << 9) | ((pvid->reg[6]&0x20) << 11); + SATmask = ~0x1ff; + if (pvid->reg[12]&1) + SATaddr &= ~0x200, SATmask &= ~0x200; // H40, zero lowest SAT bit + //elprintf(EL_STATUS, "spritep moved to %04x", SATaddr); } return; update_irq: #ifndef EMU_CORE_DEBUG - // update IRQ level - if (!SekShouldInterrupt()) // hack + // update IRQ level; TODO hack, still fire irq if disabling now + if (!SekShouldInterrupt() || SekIrqLevel < pvid->hint_irq) { - int lines, pints, irq=0; + int lines, pints, irq = 0; lines = (pvid->reg[1] & 0x20) | (pvid->reg[0] & 0x10); - pints = (pvid->pending_ints&lines); + pints = pvid->pending_ints & lines; if (pints & 0x20) irq = 6; - else if (pints & 0x10) irq = 4; - SekInterrupt(irq); // update line + else if (pints & 0x10) irq = pvid->hint_irq; - if (irq) SekEndRun(24); // make it delayed + if (irq) { + // VDP irqs have highest prio, just overwrite old level + SekInterrupt(irq); // update line + + // TODO this is broken because cost of current insn isn't known here + SekEndRun(21); // make it delayed + } else if (SekIrqLevel >= pvid->hint_irq) { + // no VDP irq, query lower irqs + SekInterrupt(PicoIn.AHW & PAHW_PICO ? PicoPicoIrqAck(0) : 0); + } } #endif } @@ -445,105 +1046,231 @@ update_irq: pvid->pending=1; } } + break; + + // case 0x08: // 08 0a - HV counter - lock up + // case 0x0c: // 0c 0e - HV counter - lock up + // case 0x10: // 10 12 - PSG - handled by caller + // case 0x14: // 14 16 - PSG - handled by caller + // case 0x18: // 18 1a - no effect? + case 0x1c: // 1c 1e - debug + pvid->debug = d; + pvid->debug_p = 0; + if (d & (1 << 6)) { + pvid->debug_p |= PVD_KILL_A | PVD_KILL_B; + pvid->debug_p |= PVD_KILL_S_LO | PVD_KILL_S_HI; + } + switch ((d >> 7) & 3) { + case 1: + pvid->debug_p &= ~(PVD_KILL_S_LO | PVD_KILL_S_HI); + pvid->debug_p |= PVD_FORCE_S; + break; + case 2: + pvid->debug_p &= ~PVD_KILL_A; + pvid->debug_p |= PVD_FORCE_A; + break; + case 3: + pvid->debug_p &= ~PVD_KILL_B; + pvid->debug_p |= PVD_FORCE_B; + break; + } + break; } } -PICO_INTERNAL_ASM unsigned int PicoVideoRead(unsigned int a) +static u32 VideoSr(const struct PicoVideo *pv) { - a&=0x1c; + unsigned int hp = pv->reg[12]&1 ? hboff40*488.5/slots40 : hboff32*488.5/slots32; + unsigned int hl = pv->reg[12]&1 ? hblen40*488.5/slots40 : hblen32*488.5/slots32; + unsigned int c = SekCyclesDone() - Pico.t.m68c_line_start; + u32 d; - if (a==0x04) // control port + PicoVideoFIFOSync(c); + d = (u16)pv->status; + + if (c - hp < hl) + d |= SR_HB; + + if (VdpFIFO.fifo_total >= 4) + d |= SR_FULL; + else if (!VdpFIFO.fifo_total) + d |= SR_EMPT; + return d; +} + +PICO_INTERNAL_ASM u32 PicoVideoRead(u32 a) +{ + struct PicoVideo *pv = &Pico.video; + a &= 0x1c; + + if (a == 0x04) // control port { - struct PicoVideo *pv=&Pico.video; - unsigned int d; - d=pv->status; - //if (PicoOpt&POPT_ALT_RENDERER) d|=0x0020; // sprite collision (Shadow of the Beast) - if (SekCyclesDone() - line_base_cycles >= 488-88) - d|=0x0004; // H-Blank (Sonic3 vs) - - d |= ((pv->reg[1]&0x40)^0x40) >> 3; // set V-Blank if display is disabled - d |= (pv->pending_ints&0x20)<<2; // V-int pending? - if (d&0x100) pv->status&=~0x100; // FIFO no longer full - - pv->pending = 0; // ctrl port reads clear write-pending flag (Charles MacDonald) - - elprintf(EL_SR, "SR read: %04x @ %06x", d, SekPc); + u32 d = VideoSr(pv); + if (pv->pending) { + CommandChange(pv); + pv->pending = 0; + } + elprintf(EL_SR, "SR read: %04x [%u] @ %06x", d, SekCyclesDone(), SekPc); return d; } - // H-counter info (based on Generator): - // frame: - // | <- hblank? -> | - // start <416> hint <36> hdisplay <38> end // CPU cycles - // |---------...---------|------------|-------------| - // 0 B6 E4 FF // 40 cells - // 0 93 E8 FF // 32 cells - - // Gens (?) v-render - // start hint hdisplay <404> | - // |---------------------|--------------------------| - // E4 (hc[0x43]==0) 07 B1 // 40 - // E8 (hc[0x45]==0) 05 91 // 32 - - // check: Sonic 3D Blast bonus, Cannon Fodder, Chase HQ II, 3 Ninjas kick back, Road Rash 3, Skitchin', Wheel of Fortune - if ((a&0x1c)==0x08) + if (a == 0x08) { - unsigned int d; + unsigned int c; + u32 d; - d = (SekCyclesDone() - line_base_cycles) & 0x1ff; // FIXME - if (Pico.video.reg[12]&1) - d = hcounts_40[d]; - else d = hcounts_32[d]; + c = SekCyclesDone() - Pico.t.m68c_line_start; + if (pv->reg[0]&2) + d = pv->hv_latch; + else d = VdpFIFO.fifo_hcounts[c/clkdiv] | (pv->v_counter << 8); - elprintf(EL_HVCNT, "hv: %02x %02x (%i) @ %06x", d, Pico.video.v_counter, SekCyclesDone(), SekPc); - return d | (Pico.video.v_counter << 8); + elprintf(EL_HVCNT, "hv: %02x %02x [%u] @ %06x", d, pv->v_counter, SekCyclesDone(), SekPc); + return d; } if (a==0x00) // data port { - return VideoRead(); + return VideoRead(0); } - return 0; + return PicoRead16_floating(a | 0xc00000); } -unsigned int PicoVideoRead8(unsigned int a) +unsigned char PicoVideoRead8DataH(int is_from_z80) { - unsigned int d; - a&=0x1d; - - switch (a) - { - case 0: return VideoRead() >> 8; - case 1: return VideoRead() & 0xff; - case 4: // control port/status reg - d = Pico.video.status >> 8; - if (d&1) Pico.video.status&=~0x100; // FIFO no longer full - Pico.video.pending = 0; - elprintf(EL_SR, "SR read (h): %02x @ %06x", d, SekPc); - return d; - case 5: - d = Pico.video.status & 0xff; - //if (PicoOpt&POPT_ALT_RENDERER) d|=0x0020; // sprite collision (Shadow of the Beast) - d |= ((Pico.video.reg[1]&0x40)^0x40) >> 3; // set V-Blank if display is disabled - d |= (Pico.video.pending_ints&0x20)<<2; // V-int pending? - if (SekCyclesDone() - line_base_cycles >= 488-88) d |= 4; // H-Blank - Pico.video.pending = 0; - elprintf(EL_SR, "SR read (l): %02x @ %06x", d, SekPc); - return d; - case 8: // hv counter - elprintf(EL_HVCNT, "vcounter: %02x (%i) @ %06x", Pico.video.v_counter, SekCyclesDone(), SekPc); - return Pico.video.v_counter; - case 9: - d = (SekCyclesDone() - line_base_cycles) & 0x1ff; // FIXME - if (Pico.video.reg[12]&1) - d = hcounts_40[d]; - else d = hcounts_32[d]; - elprintf(EL_HVCNT, "hcounter: %02x (%i) @ %06x", d, SekCyclesDone(), SekPc); - return d; - } - - return 0; + return VideoRead(is_from_z80) >> 8; } +unsigned char PicoVideoRead8DataL(int is_from_z80) +{ + return VideoRead(is_from_z80); +} + +unsigned char PicoVideoRead8CtlH(int is_from_z80) +{ + struct PicoVideo *pv = &Pico.video; + u8 d = VideoSr(pv) >> 8; + if (pv->pending) { + CommandChange(pv); + pv->pending = 0; + } + elprintf(EL_SR, "SR read (h): %02x @ %06x", d, SekPc); + return d; +} + +unsigned char PicoVideoRead8CtlL(int is_from_z80) +{ + struct PicoVideo *pv = &Pico.video; + u8 d = VideoSr(pv); + if (pv->pending) { + CommandChange(pv); + pv->pending = 0; + } + elprintf(EL_SR, "SR read (l): %02x @ %06x", d, SekPc); + return d; +} + +unsigned char PicoVideoRead8HV_H(int is_from_z80) +{ + u32 d = Pico.video.v_counter; + if (Pico.video.reg[0]&2) + d = Pico.video.hv_latch >> 8; + elprintf(EL_HVCNT, "vcounter: %02x [%u] @ %06x", d, SekCyclesDone(), SekPc); + return d; +} + +// FIXME: broken +unsigned char PicoVideoRead8HV_L(int is_from_z80) +{ + u32 d = SekCyclesDone() - Pico.t.m68c_line_start; + if (Pico.video.reg[0]&2) + d = Pico.video.hv_latch; + else d = VdpFIFO.fifo_hcounts[d/clkdiv]; + elprintf(EL_HVCNT, "hcounter: %02x [%u] @ %06x", d, SekCyclesDone(), SekPc); + return d; +} + +void PicoVideoReset(void) +{ + Pico.video.pending_ints=0; + Pico.video.reg[1] &= ~0x40; // TODO verify display disabled after reset + Pico.video.reg[10] = 0xff; // HINT is turned off after reset + Pico.video.status = 0x3428 | Pico.m.pal; // 'always set' bits | vblank | collision | pal + + memset(&VdpFIFO, 0, sizeof(VdpFIFO)); + Pico.m.dirtyPal = 1; + + PicoDrawBgcDMA(NULL, 0, 0, 0, 0); + PicoVideoFIFOMode(Pico.video.reg[1]&0x40, Pico.video.reg[12]&1); +} + +void PicoVideoCacheSAT(int load) +{ + struct PicoVideo *pv = &Pico.video; + int l; + + SATaddr = ((pv->reg[5]&0x7f) << 9) | ((pv->reg[6]&0x20) << 11); + SATmask = ~0x1ff; + if (pv->reg[12]&1) + SATaddr &= ~0x200, SATmask &= ~0x200; // H40, zero lowest SAT bit + + // rebuild SAT cache XXX wrong since cache and memory can differ + for (l = 0; load && l < 2*80; l ++) { + u16 addr = SATaddr + l*4; + ((u16 *)VdpSATCache)[l*2 ] = PicoMem.vram[(addr>>1) ]; + ((u16 *)VdpSATCache)[l*2 + 1] = PicoMem.vram[(addr>>1) + 1]; + } + + Pico.est.rendstatus |= PDRAW_DIRTY_SPRITES; +} + +void PicoVideoSave(void) +{ + struct VdpFIFO *vf = &VdpFIFO; + struct PicoVideo *pv = &Pico.video; + int l, x; + + // account for all outstanding xfers XXX kludge, entry attr's not saved + pv->fifo_cnt = pv->fifo_bgcnt = 0; + for (l = vf->fifo_ql, x = vf->fifo_qx + l-1; l > 0; l--, x--) { + int cnt = (vf->fifo_queue[x&7] >> 3); + if (vf->fifo_queue[x&7] & FQ_BGDMA) + pv->fifo_bgcnt += cnt; + else + pv->fifo_cnt += cnt; + } +} + +void PicoVideoLoad(void) +{ + struct VdpFIFO *vf = &VdpFIFO; + struct PicoVideo *pv = &Pico.video; + int b = pv->type == 1; + + // convert former dma_xfers (why was this in PicoMisc anyway?) + if (Pico.m.dma_xfers) { + pv->fifo_cnt = Pico.m.dma_xfers << b; + Pico.m.dma_xfers = 0; + } + + // fake entries in the FIFO if there are outstanding transfers + vf->fifo_ql = vf->fifo_qx = vf->fifo_total = 0; + if (pv->fifo_cnt) { + int wc = pv->fifo_cnt; + vf->fifo_total = (wc+b) >> b; + vf->fifo_queue[vf->fifo_qx + vf->fifo_ql] = (wc << 3) | b | FQ_FGDMA; + vf->fifo_ql ++; + if (vf->fifo_total > 4 && !(pv->status & (PVS_CPUWR|PVS_CPURD))) + pv->status |= PVS_CPUWR; + } + if (pv->fifo_bgcnt) { + int wc = pv->fifo_bgcnt; + if (!vf->fifo_ql) + pv->status |= PVS_DMABG; + vf->fifo_queue[vf->fifo_qx + vf->fifo_ql] = (wc << 3) | FQ_BGDMA; + vf->fifo_ql ++; + } + PicoVideoCacheSAT(1); + vf->fifo_maxslot = 0; +} // vim:shiftwidth=2:ts=2:expandtab diff --git a/pico/z80if.c b/pico/z80if.c index 8db4aa5c..474854dc 100644 --- a/pico/z80if.c +++ b/pico/z80if.c @@ -13,10 +13,27 @@ uptr z80_read_map [0x10000 >> Z80_MEM_SHIFT]; uptr z80_write_map[0x10000 >> Z80_MEM_SHIFT]; -#ifdef _USE_DRZ80 -struct DrZ80 drZ80; +u32 z80_read(u32 a) +{ + uptr v; + a &= 0x00ffff; + v = z80_read_map[a >> Z80_MEM_SHIFT]; + if (map_flag_set(v)) + return ((z80_read_f *)(v << 1))(a); + else + return *(u8 *)((v << 1) + a); +} -static u32 drz80_sp_base; + +#ifdef _USE_DRZ80 +// this causes trouble in some cases, like doukutsu putting sp in bank area +// no perf difference for most, upto 1-2% for some others +//#define FAST_Z80SP + +struct DrZ80 drZ80; +// import flag conversion from DrZ80 +extern u8 DrZ80_ARM[]; +extern u8 DrARM_Z80[]; static void drz80_load_pcsp(u32 pc, u32 sp) { @@ -28,6 +45,8 @@ static void drz80_load_pcsp(u32 pc, u32 sp) drZ80.Z80PC_BASE <<= 1; drZ80.Z80PC = drZ80.Z80PC_BASE + pc; } + drZ80.Z80SP = sp; +#ifdef FAST_Z80SP drZ80.Z80SP_BASE = z80_read_map[sp >> Z80_MEM_SHIFT]; if (drZ80.Z80SP_BASE & (1<<31)) { elprintf(EL_STATUS|EL_ANOMALY, "load_pcsp: bad SP: %04x", sp); @@ -37,6 +56,7 @@ static void drz80_load_pcsp(u32 pc, u32 sp) drZ80.Z80SP_BASE <<= 1; drZ80.Z80SP = drZ80.Z80SP_BASE + sp; } +#endif } // called only if internal xmap rebase fails @@ -47,13 +67,21 @@ static unsigned int dz80_rebase_pc(unsigned short pc) return drZ80.Z80PC_BASE; } +static void dz80_noop_irq_ack(void) {} + +#ifdef FAST_Z80SP +static u32 drz80_sp_base; + static unsigned int dz80_rebase_sp(unsigned short sp) { elprintf(EL_STATUS|EL_ANOMALY, "dz80_rebase_sp: fail on %04x", sp); drZ80.Z80SP_BASE = z80_read_map[drz80_sp_base >> Z80_MEM_SHIFT] << 1; return drZ80.Z80SP_BASE + (1 << Z80_MEM_SHIFT) - 0x100; } +#else +#define dz80_rebase_sp NULL #endif +#endif // _USE_DRZ80 void z80_init(void) @@ -78,57 +106,34 @@ void z80_init(void) void z80_reset(void) { + int is_sms = (PicoIn.AHW & (PAHW_SMS|PAHW_SG|PAHW_SC)) == PAHW_SMS; #ifdef _USE_DRZ80 drZ80.Z80I = 0; drZ80.Z80IM = 0; drZ80.Z80IF = 0; drZ80.z80irqvector = 0xff0000; // RST 38h drZ80.Z80PC_BASE = drZ80.Z80PC = z80_read_map[0] << 1; - // others not changed, undefined on cold boot -/* - drZ80.Z80F = (1<<2); // set ZFlag - drZ80.Z80F2 = (1<<2); // set ZFlag - drZ80.Z80IX = 0xFFFF << 16; - drZ80.Z80IY = 0xFFFF << 16; -*/ + // other registers not changed, undefined on cold boot +#ifdef FAST_Z80SP // drZ80 is locked in single bank - drz80_sp_base = (PicoAHW & PAHW_SMS) ? 0xc000 : 0x0000; + drz80_sp_base = (PicoIn.AHW & PAHW_8BIT) ? 0xc000 : 0x0000; drZ80.Z80SP_BASE = z80_read_map[drz80_sp_base >> Z80_MEM_SHIFT] << 1; - if (PicoAHW & PAHW_SMS) - drZ80.Z80SP = drZ80.Z80SP_BASE + 0xdff0; // simulate BIOS +#endif + drZ80.Z80SP = drZ80.Z80SP_BASE + (is_sms ? 0xdff0 : 0xffff); // simulate BIOS + drZ80.z80_irq_callback = NULL; // use auto-clear + if (PicoIn.AHW & PAHW_8BIT) + drZ80.z80_irq_callback = dz80_noop_irq_ack; // XXX: since we use direct SP pointer, it might make sense to force it to RAM, // but we'll rely on built-in stack protection for now #endif #ifdef _USE_CZ80 Cz80_Reset(&CZ80); - if (PicoAHW & PAHW_SMS) + Cz80_Set_Reg(&CZ80, CZ80_SP, 0xffff); + if (is_sms) Cz80_Set_Reg(&CZ80, CZ80_SP, 0xdff0); #endif } -/* save state stuff */ -static int z80_unpack_legacy(const void *data) -{ -#if defined(_USE_DRZ80) - if (*(int *)data == 0x015A7244) { // "DrZ" v1 save? - u32 pc, sp; - memcpy(&drZ80, data+4, 0x54); - pc = (drZ80.Z80PC - drZ80.Z80PC_BASE) & 0xffff; - sp = (drZ80.Z80SP - drZ80.Z80SP_BASE) & 0xffff; - // update bases - drz80_load_pcsp(pc, sp); - return 0; - } -#elif defined(_USE_CZ80) - if (*(int *)data == 0x00007a43) { // "Cz" save? - memcpy(&CZ80, data+8, offsetof(cz80_struc, BasePC)); - Cz80_Set_Reg(&CZ80, CZ80_PC, *(int *)(data+4)); - return 0; - } -#endif - return -1; -} - struct z80sr_main { u8 a, f; u8 b, c; @@ -151,24 +156,26 @@ struct z80_state { u8 im; // irq mode u8 irq_pending; // irq line level, 1 if active u8 irq_vector[3]; // up to 3 byte vector for irq mode0 handling - u8 reserved[8]; + u16 cyc; + u8 reserved[6]; }; void z80_pack(void *data) { struct z80_state *s = data; memset(data, 0, Z80_STATE_SIZE); - strcpy(s->magic, "Z80"); + memcpy(s->magic, "Z80a", 4); + s->cyc = Pico.t.z80c_cnt + ((Pico.t.z80_busdelay + (1<<8)/2) >> 8); #if defined(_USE_DRZ80) #define DRR8(n) (drZ80.Z80##n >> 24) #define DRR16(n) (drZ80.Z80##n >> 16) #define DRR16H(n) (drZ80.Z80##n >> 24) #define DRR16L(n) ((drZ80.Z80##n >> 16) & 0xff) - s->m.a = DRR8(A); s->m.f = drZ80.Z80F; + s->m.a = DRR8(A); s->m.f = DrARM_Z80[drZ80.Z80F]; s->m.b = DRR16H(BC); s->m.c = DRR16L(BC); s->m.d = DRR16H(DE); s->m.e = DRR16L(DE); s->m.h = DRR16H(HL); s->m.l = DRR16L(HL); - s->a.a = DRR8(A2); s->a.f = drZ80.Z80F2; + s->a.a = DRR8(A2); s->a.f = DrARM_Z80[drZ80.Z80F2]; s->a.b = DRR16H(BC2); s->a.c = DRR16L(BC2); s->a.d = DRR16H(DE2); s->a.e = DRR16L(DE2); s->a.h = DRR16H(HL2); s->a.l = DRR16L(HL2); @@ -203,7 +210,7 @@ void z80_pack(void *data) s->iff1 = !!zIFF1; s->iff2 = !!zIFF2; s->im = zIM; - s->irq_pending = (Cz80_Get_Reg(&CZ80, CZ80_IRQ) == HOLD_LINE); + s->irq_pending = (Cz80_Get_Reg(&CZ80, CZ80_IRQ) != CLEAR_LINE); s->irq_vector[0] = 0xff; } #endif @@ -212,22 +219,32 @@ void z80_pack(void *data) int z80_unpack(const void *data) { const struct z80_state *s = data; - if (strcmp(s->magic, "Z80") != 0) { - if (z80_unpack_legacy(data) != 0) - goto fail; - elprintf(EL_STATUS, "legacy z80 state"); + if (memcmp(s->magic, "Z80", 3) != 0) { + elprintf(EL_STATUS, "legacy z80 state - ignored"); return 0; } + Pico.t.z80c_cnt = s->cyc; + Pico.t.z80_busdelay = 0; #if defined(_USE_DRZ80) #define DRW8(n, v) drZ80.Z80##n = (u32)(v) << 24 #define DRW16(n, v) drZ80.Z80##n = (u32)(v) << 16 #define DRW16HL(n, h, l) drZ80.Z80##n = ((u32)(h) << 24) | ((u32)(l) << 16) - DRW8(A, s->m.a); drZ80.Z80F = s->m.f; + u8 mf, af; + if (s->magic[3] == 'a') { + // new save: flags always in Z80 format + mf = DrZ80_ARM[s->m.f]; + af = DrZ80_ARM[s->a.f]; + } else { + // NB hack, swap Flag3 and NFlag for save file compatibility + mf = (s->m.f & 0x9f)|((s->m.f & 0x40)>>1)|((s->m.f & 0x20)<<1); + af = (s->a.f & 0x9f)|((s->a.f & 0x40)>>1)|((s->a.f & 0x20)<<1); + } + DRW8(A, s->m.a); drZ80.Z80F = mf; DRW16HL(BC, s->m.b, s->m.c); DRW16HL(DE, s->m.d, s->m.e); DRW16HL(HL, s->m.h, s->m.l); - DRW8(A2, s->a.a); drZ80.Z80F2 = s->a.f; + DRW8(A2, s->a.a); drZ80.Z80F2 = af; DRW16HL(BC2, s->a.b, s->a.c); DRW16HL(DE2, s->a.d, s->a.e); DRW16HL(HL2, s->a.h, s->a.l); @@ -262,16 +279,13 @@ int z80_unpack(const void *data) Cz80_Set_Reg(&CZ80, CZ80_IFF1, s->iff1); Cz80_Set_Reg(&CZ80, CZ80_IFF2, s->iff2); zIM = s->im; - Cz80_Set_Reg(&CZ80, CZ80_IRQ, s->irq_pending ? HOLD_LINE : CLEAR_LINE); + Cz80_Set_Reg(&CZ80, CZ80_IRQ, s->irq_pending ? ASSERT_LINE : CLEAR_LINE); + Cz80_Set_IRQ(&CZ80, 0, Cz80_Get_Reg(&CZ80, CZ80_IRQ)); return 0; } +#else + return 0; #endif - -fail: - elprintf(EL_STATUS|EL_ANOMALY, "z80_unpack failed"); - z80_reset(); - z80_int(); - return -1; } void z80_exit(void) @@ -286,3 +300,5 @@ void z80_debug(char *dstr) sprintf(dstr, "Z80 state: PC: %04x SP: %04x\n", (unsigned int)(CZ80.PC - CZ80.BasePC), CZ80.SP.W); #endif } + +// vim:ts=2:sw=2:expandtab diff --git a/platform/base_readme.txt b/platform/base_readme.txt index 60e91f5e..df2c8274 100644 --- a/platform/base_readme.txt +++ b/platform/base_readme.txt @@ -1,1083 +1 @@ -# -PicoDrive 1.xx - -About ------ - -This is yet another Megadrive / Genesis / Sega CD / Mega CD / 32X / SMS -emulator, which was written having ARM-based handheld devices in mind -(such as smartphones and handheld consoles like GP2X and Pandora). - -The emulator is heavily optimized for ARM, features assembly cores for -68k, Z80 and VDP chip emulation, also has dynamic recompilers for SH2 and -SSP16 (for 32X and SVP emulation). It was started by Dave (aka fdave, -finalburn author) as basic Genesis/Megadrive emulator for Pocket PC, -then taken over and expanded by notaz. - -PicoDrive is the first emulator ever to properly emulate Virtua Racing and -it's SVP chip. - - -How to make it run ------------------- - -#ifdef GP2X -Extract all files to some directory on your SD and run PicoDrive.gpe from your -GP2X/Wiz/Caanoo menu. The same .gpe supports GP2X F100/F200, Wiz and Caanoo, -there is no need to use separate versions. -Then load a ROM and enjoy! ROMs can be in .smd or .bin format and can be zipped. -Sega/Mega CD images can be in ISO/CSO+MP3/WAV or CUE+BIN formats (read below -for more details). -#endif -#ifdef GIZ -First make sure you have homebrew-enabled Service Pack installed. Then copy -PicoDrive.exe and KGSDK.dll to any place in your filesystem (both files must -be in the same directory) and run PicoDrive.exe using the launcher of your choice -(some of them might require renaming PicoDrive.exe to Autorun.exe, placing it in -the root of SD, etc). Then load a ROM and enjoy! ROMs can be placed anywhere, can -be in .smd or .bin format and can be zipped (one ROM per zip). -#endif -#ifdef PSP -If you are running a custom firmware, just copy the whole PicoDrive directory to -/PSP/GAME or /PSP/GAMEXXX directory in your memory stick (it shouldn't matter -which one GAME* directory to use). - -If you are on 1.5, there is a separate KXploited version for it. -#endif -#ifdef PANDORA -Just copy the .pnd to /pandora/menu or /pandora/desktop. -#endif - -This emulator has lots of options with various tweaks (for improved speed mostly), -but it should have best compatibility in it's default config. If suddenly you -start getting glitches or change something and forget what, use "Restore defaults" -option. - - -How to run Sega/Mega CD games ------------------------------ - -To play any CD game, you need BIOS files. These files must be copied to -#ifdef PANDORA -/pandora/appdata/picodrive/ directory -(if you run PicoDrive once it will create that directory for you). -#else -the same directory as PicoDrive files. -#endif -Files can be named as follows: - -US: us_scd1_9210.bin us_scd2_9306.bin SegaCDBIOS9303.bin -EU: eu_mcd1_9210.bin eu_mcd2_9303.bin eu_mcd2_9306.bin -JP: jp_mcd1_9112.bin jp_mcd1_9111.bin -these files can also be zipped. - -The game must be dumped to CUE+BIN or CUE+ISO format. -ISO/CSO+MP3/WAV is also supported, but may cause problems. -When using CUE/BIN, you must load .cue file from the menu, or else -the emu will not find audio tracks. - - -Other important stuff ---------------------- - -* Sega/Mega CD: If the background music is missing, the CD image format may be - wrong. Currently .cue/bin is recommended. Be aware that there are lots of bad - dumps on the web, and some use mp3 format for audio, which often causes - problems (see below). -* While iso/mp3 format is supported, it's not recommended to use. - Some of many problems with mp3 are listed below: - * MP3s may be named incorrectly and will not play. - * The game music may play too fast/too slow/out of sync, which means they - are encoded incorrectly. PicoDrive is not a mp3 player, so all mp3s MUST - be encoded at 44.1kHz stereo. -* Sega/Mega CD: If your games hang at the BIOS screen (with planets shown), - you may be using a bad BIOS dump. Try another from a different source, - like dumping it from your own console. -#ifdef GP2X -* What using mp3s, use lower bitrate for better performance (96 or 128kbps - CBRs recommended). -* GP2X F100/F200: When you use both GP2X CPUs, keep in mind that you can't - overclock as high as when using ARM920 only. For example my GP2X when run - singlecore can reach 280MHz, but with both cores it's about 250MHz. When - overclocked too much, it may start hanging and producing random noise, or - causing ARM940 crashes ("940 crashed" message displayed). -* GP2X F100/F200: Due to internal implementation mp3s must not be larger that - 12MB (12582912 bytes). Larger mp3s will not be fully loaded. -#endif - - -Configuration -------------- - -@@0. "Save slot" -This is a slot number to use for savestates, when done by a button press outside -menu. This can also be configured to be changed with a button -(see "key configuration"). - -@@0. "Frameskip" -How many frames to skip rendering before displaying another. -"Auto" is recommended. - -@@0. "Region" -This option lets you force the game to think it is running on machine from the -specified region, or just to set autodetection order. Also affects Sega/Mega CD. - -@@0. "Show FPS" -Self-explanatory. Format is XX/YY, where XX is the number of rendered frames and -YY is the number of emulated frames per second. - -@@0. "Enable sound" -Does what it says. You must enable at least YM2612 or SN76496 (in advanced options, -see below) for this to make sense (already done by default). - -@@0. "Sound Quality" -#ifdef PSP -Sound sample rate, affects sound quality and emulation performance. -22050Hz setting is the recommended one. -#else -Sound sample rate and stereo mode. Mono is not available in Sega/Mega CD mode. -#endif - -@@0. "Confirm savestate" -Allows to enable confirmation on savestate saving (to prevent savestate overwrites), -on loading (to prevent destroying current game progress), and on both or none, when -using shortcut buttons (not menu) for saving/loading. - -@@0. "[Display options]" -Enters Display options menu (see below). - -@@0. "[Sega/Mega CD options]" -Enters Sega/Mega CD options menu (see below). - -@@0. "[32X options]" -Enters 32X options menu (see below). - -@@0. "[Advanced options]" -Enters advanced options menu (see below). - -@@0. "Save cfg as default" -If you save your config here it will be loaded on next ROM load, but only if there -is no game specific config saved (which will be loaded in that case). -You can press left/right to switch to a different config profile. - -@@0. "Save cfg for current game only" -Whenever you load current ROM again these settings will be loaded. - -@@0. "Restore defaults" -Restores all options (except controls) to defaults. - - -Display options ---------------- - -#ifndef PANDORA -@@1. "Renderer" -#ifdef GP2X -8bit fast: -This enables alternative heavily optimized tile-based renderer, which renders -pixels not line-by-line (this is what accurate renderers do), but in 8x8 tiles, -which is much faster. But because of the way it works it can't render any -mid-frame image changes (raster effects), so it is useful only with some games. - -Other two are accurate line-based renderers. The 8bit is faster but does not -run well with some games like Street Racer. - -#endif -#ifdef GIZ -This option allows to switch between 16bit and 8bit renderers. The 8bit one is -a bit faster for some games, but not much, because colors still need to be -converted to 16bit, as this is what Gizmondo requires. It also introduces -graphics problems for some games, so it's best to use 16bit one. - -#endif -#ifdef PSP -This option allows to switch between fast and accurate renderers. The fast one -is much faster, because it draws the whole frame at a time, instead of doing it -line by line, like the accurate one does. But because of the way it works it -can't render any mid-frame image changes (raster effects), so it is useful only -for some games. - -#endif -#endif -#ifdef GP2X -@@1. "Tearing Fix" -Wiz only: works around the tearing problem by using portrait mode. Causes ~5-10% -performance hit, but eliminates the tearing effect. - -@@1. "Gamma correction" -F100/F200 only: Alters image gamma through GP2X hardware. Larger values make -image to look brighter, lower - darker (default is 1.0). - -@@1. "Vsync" -This one adjusts the LCD refresh rate to better match game's refresh rate and -starts synchronizing rendering with it. Should make scrolling smoother and -eliminate tearing on F100/F200. -#endif -#ifdef GIZ -@@1. "Scanline mode" -This option was designed to work around slow framebuffer access (the Gizmondo's -main bottleneck) by drawing every other line (even numbered lines only). -This improves performance greatly, but looses detail. - -@@1. "Scale low res mode" -The Genesis/Megadrive had several graphics modes, some of which were only 256 -pixels wide. This option scales their width to 320 by using simple -pixel averaging scaling. Works only when 16bit renderer is enabled. - -@@1. "Double buffering" -Draws the display to offscreen buffer, and flips it with visible one when done. -Unfortunately this causes serious tearing, unless v-sync is used (next option). - -@@1. "Wait for V-sync" -Waits for vertical sync before drawing (or flipping buffers, if previous option -is enabled). Emulation is stopped while waiting, so this causes large performance -hit. -#endif -#ifdef PSP -@@1. "Scale factor" -This allows to resize the displayed image by using the PSP's hardware. The number is -used to multiply width and height of the game image to get the size of image to be -displayed. If you just want to make it fullscreen, just use "Set to fullscreen" -setting below. - -@@1. "Hor. scale (for low res. games)" -This one works similarly as the previous setting, but can be used to apply additional -scaling horizontally, and is used for games which use lower (256 pixel wide) Gen/MD -resolution. - -@@1. "Hor. scale (for hi res. games)" -Same as above, only for higher (320 pixel wide) resolution using games. - -@@1. "Bilinear filtering" -If this is enabled, PSP hardware will apply bilinear filtering on the resulting image, -making it smoother, but blurry. - -@@1. "Gamma adjustment" -Color gamma can be adjusted with this. - -@@1. "Black level" -This can be used to reduce unwanted "ghosting" effect for dark games, by making -black pixels brighter. Use in conjunction with "gamma adjustment" for more effect. - -@@1. "Wait for v-sync" -Wait for the screen to finish updating before switching to next frame, to avoid tearing. -There are 3 options: -* never: don't wait for vsync. -* sometimes: wait only if emulator is running fast enough. -* always: always wait (causes emulation slowdown). - -@@1. "Set to unscaled centered" -Adjust the resizing options to set game image to it's original size. - -@@1. "Set to 4:3 scaled" -Scale the image up, but keep 4:3 aspect, by adding black borders. - -@@1. "Set to fullscreen" -Adjust the resizing options to make the game image fullscreen. -#endif -#ifdef PANDORA -Allows to set up scaling, filtering and vertical sync. -#endif - - -Sega/Mega CD options --------------------- - -@@2. "CD LEDs" -The Sega/Mega CD unit had two blinking LEDs (red and green) on it. This option -will display them on top-left corner of the screen. - -@@2. "CDDA audio" -This option enables CD audio playback. - -@@2. "PCM audio" -This enables 8 channel PCM sound source. It is required for some games to run, -because they monitor state of this audio chip. - -@@2. "ReadAhead buffer" -This option can prefetch more data from the CD image than requested by game -(to avoid accessing card later), what can improve performance in some cases. -#ifndef PSP -"OFF" is the recommended setting. -#endif - -@@2. "Save RAM cart" -Here you can enable 64K RAM cart. Format it in BIOS if you do. - -@@2. "Scale/Rot. fx" -The Sega/Mega CD had scaling/rotation chip, which allows effects similar to -"Mode 7" effects in SNES. On slow systems like GP2X, disabling may improve -performance but cause graphical glitches. - - -32X options ------------ - -@@3. "32X enabled" -Enables emulation of addon. Option only takes effect when ROM is reloaded. - -#ifdef GP2X -@@3. "32X renderer" -This currently only affects how the Genesis/MD layers are rendered, which is -same as "Renderer" in display options. - -#endif -@@3. "PWM sound" -Emulates PWM sound portion of 32X hardware. Disabling this may greatly improve -performance for games that dedicate one of SD2s for sound, but will cause -missing sound effects and instruments. - -@@3. "Master SH2 cycles" / "Slave SH2 cycles" -This allows underclocking the 32X CPUs for better emulation performance. The -number has the same meaning as cycles in DOSBox, which is cycles per millisecond. -Underclocking too much may cause various in-game glitches. - - -Advanced configuration ----------------------- - -@@4. "Use SRAM/BRAM savestates" -This will automatically read/write SRAM (or BRAM for Sega/Mega CD) savestates for -games which are using them. SRAM is saved whenever you enter the menu or exit the -emulator. - -@@4. "Disable sprite limit" -The MegaDrive/Genesis had a limit on how many sprites (usually smaller moving -objects) can be displayed on single line. This option allows to disable that -limit. Note that some games used this to hide unwanted things, so it is not -always good to enable this option. - -@@4. "Emulate Z80" -Enables emulation of Z80 chip, which was mostly used to drive the other sound chips. -Some games do complex sync with it, so you must enable it even if you don't use -sound to be able to play them. - -@@4. "Emulate YM2612 (FM)" -This enables emulation of six-channel FM sound synthesizer chip, which was used to -produce sound effects and music. - -@@4. "Emulate SN76496 (PSG)" -This enables emulation of PSG (programmable sound generation) sound chip for -additional effects. - -Note: if you change sound settings AFTER loading a ROM, you may need to reset -game to get sound. This is because most games initialize sound chips on -startup, and this data is lost when sound chips are being enabled/disabled. - -@@4. "gzip savestates" -This will always apply gzip compression on your savestates, allowing you to -save some space and load/save time. - -@@4. "Don't save last used ROM" -This will disable writing last used ROM to config on exit (what might cause SD -card corruption according to DaveC). - -@@4. "Disable idle loop patching" -Idle loop patching is used to improve performance, but may cause compatibility -problems in some rare cases. Try disabling this if your game has problems. - -@@4. "Disable frame limiter" -This allows games to run faster then 50/60fps, useful for benchmarking. - -#ifdef GP2X -@@4. "Use ARM940 core for sound" -F100/F200: This option causes PicoDrive to use ARM940T core (GP2X's second CPU) -for sound (i.e. to generate YM2612 samples) to improve performance noticeably. -It also decodes MP3s in Sega/Mega CD mode. - -#endif -@@4. "SVP dynarec" -This enables dynamic recompilation for SVP chip emulated for Virtua Racing game, -what improves it's emulation performance greatly. - - -Key configuration ------------------ - -Select "Configure controls" from the main menu. Then select "Player 1" and you will -see two columns. The left column lists names of Genesis/MD controller buttons, and -the right column your handheld ones, which are assigned. - -There is also option to enable 6 button pad (will allow you to configure XYZ -buttons), and an option to set turbo rate (in Hz) for turbo buttons. - - -Cheat support -------------- - -To use GG/patch codes, you must type them into your favorite text editor, one -per line. Comments may follow code after a whitespace. Only GameGenie and -Genecyst patch formats are supported. -Examples: - -Genecyst patch (this example is for Sonic): - -00334A:0005 Start with five lives -012D24:0001 Keep invincibility until end of stage -009C76:5478 each ring worth 2 -009C76:5678 each ring worth 3 -... - -Game Genie patch (for Sonic 2): - -ACLA-ATD4 Hidden palace instead of death egg in level select -... - -Both GG and patch codes can be mixed in one file. - -When the file is ready, name it just like your ROM file, but with additional -.pat extension, making sure that case matches. - -Examples: - -ROM: Sonic.zip -PATCH FILE: Sonic.zip.pat - -ROM: Sonic 2.bin -PATCH FILE: Sonic 2.bin.pat - -Put the file into your ROMs directory. Then load the .pat file as you would -a ROM. Then Cheat Menu Option should appear in main menu. - - -What is emulated? ------------------ - -Genesis/MegaDrive: -#ifdef PSP -main 68k @ 7.6MHz: yes, FAME/C core -z80 @ 3.6MHz: yes, CZ80 core -#else -main 68k @ 7.6MHz: yes, Cyclone core -z80 @ 3.6MHz: yes, DrZ80 core -#endif -VDP: yes, except some quirks and modes not used by games -YM2612 FM: yes, optimized MAME core -SN76489 PSG: yes, MAME core -SVP chip: yes! This is first emu to ever do this. -Some in-cart mappers are also supported. - -Sega/Mega CD: -#ifdef PSP -another 68k @ 12.5MHz: yes, FAME/C too -#else -another 68k @ 12.5MHz: yes, Cyclone too -#endif -gfx scaling/rotation chip (custom ASIC): yes -PCM sound source: yes -CD-ROM controller: yes (mostly) -bram (internal backup RAM): yes - -32X: -2x SH2 @ 23MHz: yes, custom recompiler -Super VDP: yes -PWM: yes - - -Problems / limitations ----------------------- - -#ifdef PSP -* SVP emulation is terribly slow. -#endif -* Various VDP modes and quirks (window bug, scroll size 2, etc.) are not - emulated, as very few games use this (if any at all). -* The emulator is not 100% accurate, so some things may not work as expected. -* The FM sound core doesn't support all features and has some accuracy issues. - - -Credits -------- - -This emulator is made of the code from following people/projects: - -notaz -GP2X, UIQ, PSP, Gizmondo ports, CPU core hacks, dynamic recompilers, -lots of additional coding (see changelog). -Homepage: http://notaz.gp2x.de/ - -fDave -one who started it all: -Cyclone 68000 core and PicoDrive itself - -Chui -FAME/C 68k interpreter core -(based on C68K by Stephane Dallongeville) - -Stephane Dallongeville (written), NJ (optimized) -CZ80 Z80 interpreter core - -Reesy & FluBBa -DrZ80, the Z80 interpreter written in ARM assembly. -Homepage: http://reesy.gp32x.de/ (defunct) - -Tatsuyuki Satoh, Jarek Burczynski, MAME development -software implementation of Yamaha FM sound generator - -MAME development -Texas Instruments SN76489 / SN76496 programmable tone/noise generator -Homepage: http://www.mame.net/ - -Eke -CD graphics processor implementation (from Genesis Plus GX) - -Stephane Dallongeville -Gens, MD/Mega CD/32X emulator. Some Sega CD code is based on this emu. -#ifdef PSP - -people @ ps2dev.org forums / PSPSDK crew -libaudiocodec code (by cooleyes) -other sample code -#endif - - -Additional thanks ------------------ - -* Charles MacDonald (http://cgfm2.emuviews.com/) for old but still very useful - info about genesis hardware. -* Steve Snake for all that he has done for Genesis emulation scene. -* Tasco Deluxe for his reverse engineering work on SVP and some mappers. -* Bart Trzynadlowski for his SSFII and 68000 docs. -* Haze for his research (http://haze.mameworld.info). -* Lordus, Exophase and Rokas for various ideas. -* Nemesis for his YM2612 research. -* Eke -* Many posters at spritesmind.net forums for valuable information. -* Mark and Jean-loup for zlib library. -* ketchupgun for the skin. -#ifdef GP2X -* rlyeh and all the other people behind the minimal library. -* Squidge for his famous squidgehack(tm). -* Dzz for his ARM940 sample code. -* A_SN for his gamma code. -* craigix for supplying the GP2X hardware and making this port possible. -* Alex for the icon. -* All the people from gp32x boards for their support. -#endif -#ifdef GIZ -* Kingcdr's for the SDK and Reesy for the DLL and sound code. -* jens.l for supplying the Gizmondo hardware and making this port possible. -#endif -* Inder for some graphics. -* Anyone else I forgot. You know who you are. - - -Changelog ---------- -1.90 (2013-09-24) - + 32X+CD emulation has been implemented. - + CD graphics processor code has been replaced with much cleaner Eke's - implementation from Genesis Plus GX. - + CD PCM code has been completely rewritten. - * Various CD compatibility issues have been solved. Hopefully no more - regressions this time. - * pandora: fixed tv-out (again), added automatic layer switching - * libretro: fixed crackling sound for some games, added some core options - -1.85 (2013-08-31) - * Lots of 32X compatibility and accuracy improvements. All commercial games - are booting now, but some still have issues. - * Fixed some regressions in MegaCD code, like hang in jap BIOS. - * Implemented pause for SMS. - * Updated UI with improvements from PCSX ReARMed. - * Frontend timing has been rewritten, should no longer slowly desync from - LCD on pandora. - * Added libretro and SDL 32/64bit ports, fixed compatibility issues with - Android, iOS. - * Various other things I forgot (it has been a while since last release..) - -1.80 (2010-09-19) - + Added Caanoo support. Now the GP2X binary supports GP2X F100/F200, Wiz - and Caanoo. Lots of internal refactoring to support this. - + Enabled 32X and SMS code. It's still unfinished but better release something - now than wait even more (it has been in development for more then a year now - due to various other projects or simply lack of time). - + Pandora: added hardware scaler support, including ability to resize the - layer and control filtering. - + GP2X: Added basic line-doubling vertical scaling option. - * Changed the way keys are bound, no need to unbind old one any more. - * Handle MP3s with ID3 tags better (some MP3s with ID3 did not play). - * Improved shadow/hilight color levels. - * Fixed broken cheat support. - -1.80beta2 - * Pandora: updated documentation. - -1.80beta1 (2010-06-02) - + Added pandora port. - * Internal refactoring for 32x/SMS support. - * Move mapper database to external file. - + Added preliminary SMS emulation. - + Added emulation of 32x peripherals including VDP. More work is needed here. - + ARM: Added new SH2 recompiler for 32x. Some unification with SVP one. - - Disabled most of the above bacause I'm not yet happy with the results. - -1.56 (2009-09-19) - * Changed sync in Sega CD emulation again. Should fix games that - broke after changes in 1.51a. - * Fixed default keys rebinding when they shouldn't. - * Fixed sram being loaded from wrong game. - * Emu should no longer hang shortly after using fast-forward. - * Fixed save states sometimes no longer showing up in save state menu. - * ARM: some asm code refactoring for slight speed improvement. - -1.55 - + Added Wiz support. Now the same GP2X binary supports F100/F200 and Wiz. - * Changed shadow/hilight handling a bit, fixes some effects in Pirates! Gold. - * Complete input code rewrite. This fixes some limitations like not allowing - to control both players using single input device. It also allows to use - more devices (like keyboards) on Linux based devices. - * Options menu has been reordered, "restore defaults" option added. - -1.51b - * Fixed a crash when uncompressed savestate is loaded. - * Fixed an idle loop detection related hanging problem. - * PSP: fixed another palette related regression. - * UIQ3: updated frontend for the latest emu core. - -1.51a - * Fixed a sync problem between main and sub 68k. Should fix the hanging - problem for some games. - * ARM: fixed a crash when CD savestate is loaded just after loading ROM. - -1.51 - * Improved bin_to_cso_mp3 tool, it should no longer complain about - missing lame.exe even if it's in working dir. - * Fixed a regression from 1.50, which caused slowdowns in Final Fight. - * Fixed some regressions from 1.50 related to sprite limit and palette - handling (caused graphical glitches in some games). - + Added ABC turbo actions to key config. - * Some other minor adjustments. - -1.50 - + Added some basic support for Sega Pico, a MegaDrive-based toy. - + Added proper support for cue/bin images, including cdda playback. - .cue sheets with iso/cso/mp3/wav files listed in them are now - supported too (but 44kHz restriction still applies). - + Added bin_to_cso_mp3 tool, based on Exophase's bin_to_iso_ogg. - The tool can convert .cue/.bin Sega CD images to .cso/.mp3. - * Greatly improved Sega CD load times. - * Changed how scheduling between 68k and z80 is handled. Improves - performance for some games. Credits to Lordus for the idea. - * YM2612 state was not 100% saved, this should be better now. - * Improved renderer performance for shadow/hilight mode. - * Added a hack for YM2612 frequency overflow issue (bleep noises - in Shaq Fu, Spider-Man - The Animated Series (intro music), etc.) - Credits to Nemesis @ spritesmind forum. Works only if sound rate - is set to 44kHz. - + Implemented some sprite rendering improvements, as suggested by - Exophase. Games with lots of sprites now perform better. - + Added better idle loop detection, based on Lordus' idea again. - - "accurate timing" option removed, as disabling it no longer - improves performance. - - "accurate sprites" was removed too, the new sprite code can - properly handle sprite priorities in all cases. - * Timers adjusted again. - * Improved .smd detection code. - * ARM: fixed a bug in DrZ80 core, which could cause problems in - some rare cases. - * ARM: fixed a problem of occasional clicks on MP3 music start. - * Minor general optimizations and menu improvements. - * Fixed a bug in Sega CD savestate loader, where the game would - sometimes crash after load. - * Fixed a crash of games using eeprom (introduced in 1.40b). - * PSP: fixed suspend/resume (hopefully for real). - -1.40c - * Fixed a problem with sound in Marble Madness. - * GP2X: Fixed minor problem with key config. - -1.40b - * Fixed sprite masking code. Thanks to Lordus for explaining how it works. - + Added "disable sprite limit" option. - + PSP: added black level adjustment to display options. - * Changed reset to act as 'soft' reset. - + Added detection for Puggsy (it doesn't really have sram). - * Some small timing adjustments. - -1.40a - * GP2X: Fixed a binding problem with up and down keys. - * Default game config no longer overrides global user config. - -1.40 - + Added support for SVP (Sega Virtua Processor) to emulate Virtua Racing, - wrote ARM recompiler and some HLE code for VR. Credits to Exophase and - Rokas for various ideas. - * Changed config file format, files are now human-readable. Game specific - configs are now held in single file (but old game config files are still - read when new one is missing). - * Fixed a bug where some key combos didn't work as expected. - * Fixed a regression in renderer (ARM ports only, some graphic glitches in - rare cases). - * Adjusted fast renderer to work with more games, including VR. - * Fixed a problem where SegaCD RAM cart data was getting lost on reset. - * GP2X: Greatly reduced SegaCD FMV game slowdowns by disabling read-ahead - in the Linux kernel and C library (thanks to Rokas and Exophase for ideas - again). Be sure to keep "ReadAhead buffer" OFF to avoid slowdowns. - + PicoDrive now comes with a game config file for some games which need - special settings, so they should now work out-of-the-box. More games will - be added with later updates. - + GP2X: Files now can be deleted by pressing A+SELECT in the file browser. - -1.35b - * PSP: mp3 code should no longer fail on 1.5 firmware. - + PSP: added gamma adjustment option. - + Added .cso ISO format support. Useful for non-FMV games. - * It is now possile to force a region after the ROM is loaded. - * Fixed a sram bug in memhandlers (fixes Shining in the Darkness saves). - * PSP: fixed another bug in memhanlers, which crashed the emu for some games - (like NBA Jam and NHL 9x). - + PSP: added suspend/resume handling for Sega CD games. - + GP2X: added additional low volume levels for my late-night gaming sessions - (in stereo mode only). - + GP2X: added "fast forward" action in key config. Not recommended to use for - Sega CD, may case problems there. - * Some other small tweaks I forgot about. - -1.35a - * PSP: fixed a bug which prevented to load any ROMs after testing the BIOS. - * PSP: fixed incorrect CZ80 memory map setup, which caused Z80 crashes and - graphics corruption in EU Mega CD model1 BIOS menus. - + PSP: added additional "set to 4:3 scaled" display option for convenience. - + PSP: Added an option to disable frame limitter (works only with non-auto frameskip). - -1.35 - + PSP port added. Lots of new code for it. Integrated modified FAME/C, CZ80 cores. - + Some minor generic optimizations. - * Patched some code which was crashing under PSP, but was working in GP2X/Giz - (although it should have crashed there too). - * Readme updated. - -1.34 - + Gizmondo port added. - + Some new optimizations in memory handlers, and for shadow/hilight mode. - + Added some hacks to make more games work without enabling "accurate timing". - * Adjusted timing for "accurate timing" mode and added preliminary VDP FIFO - emulation. Fixes Double Dragon 2, tearing in Chaos Engine and some other games. - * Fixed a few games not having sound at startup. - * Updated serial EEPROM code to support more games. Thanks to EkeEke for - providing info about additional EEPROM types and game mappers. - * The above change fixed hang of NBA Jam. - * Minor adjustments to control configurator. - -1.33 - * Updated Cyclone core to 0.0088. - + Added A r k's usbjoy fix. - + Added "perfect vsync" option, which adjusts GP2X LCD refresh rate and syncs - emulation to it to eliminate tearing and ensure smoothest scrolling possible. - + Added an option to use A_SN's gamma curve for gamma correction (improves dark - and bright color display for mk2s). - * Sometimes stray sounds were played after loading a savestate. Fixed. - * Fixed a problem where >6MB mp3s were corrupted in memory (sound glitches in - Snatcher). - * PD no longer overwrites video player code in memory, video player now can be - used after exiting PicoDrive. - * Fixed a bug which was causing Sonic 3 code to deadlock in some rare conditions - if "accurate timing" was not enabled. - * Fixed support for large hacked ROMs like "Ultimate Mortal Kombat Trilogy". - Upto 10MB hacked ROMs are supported now. - + Config profiles added (press left/right when saving config). - * Changed key configuration behavior to the one from gpfce (should be more - intuitive). - + Added some skinning capabilities to the menu system with default skin by - ketchupgun. Delete skin directory if you want old behaviour. - * Some other little tweaks I forgot about. - -1.32 - + Added some new scaling options. - + Added ability to reload CD images while game is running (needed for games - with multiple CDs, like Night Trap). - + Added RAM cart emulation. - * Fixed DMA timing emulation (caused lock-ups for some genesis games). - * Idle loop detection was picking up wrong code and causing glitches, fixed. - * The ym2612 code on 940 now can handle multiple updates per frame - (fixes Thunger Force III "seiren" level drums for example). - * Memory handlers were ignoring some writes to PSG chip, fixed (missing sounds in - Popful Mail, Silpheed). - * Improved z80 timing, should fix some sound problems. - * Fixed a bug with sram register (fixes Phantasy Star 4). - * ROM loader was incorrectly identifying some ROMs as invalid. Fixed. - * Added code for PRG ram write protection register (Dungeon Explorer). - * The memory mode register change in 1.31 was unsafe and caused some glitches in - AH-3 Thunderstrike. Fixed. - * Fixed a file descriptor leak. - * Updated documentation, added Gmenu2x manual. - -1.31 - * Changed the way memory mode register is read (fixes Lunar 2, broken in 1.30). - * Fixed TAS opcode on sub-68k side (fixes Batman games). - * File browser now filters out mp3s, saves and some other files, which are not ROMS. - -1.30 - + ISO files now can be zipped. Note that this causes VERY long loading times. - + Added data pre-buffering support, this allows to reduce frequency of short pauses - in FMV games (caused by SD access), but makes those pauses longer. - * Fixed PCM DMA transfers (intro FMV in Popful Mail). - + Properly implemented "decode" data transformation (Jaguar XJ220). - * Integrated "better sync" code into cyclone code, what made this mode much faster. - * Fixed a bug related to game specific config saving. - * Frameskipper was skipping sound processing, what caused some audio desyncs. Fixed. - * Fixed reset not working for some games. - + New assembly optimized memory handlers for CD (gives at least a few fps). - Also re-enabled all optimizations from 0.964 release. - + New idle-loop detection code for sub-68k. Speeds up at least a few games. - -1.201 - + Added basic cheat support (GameGenie and Genecyst patches). - -1.20 - * Fixed a long-standing problem in audio mixing code which caused slight distortions - at lower sample rates. - * Changed the way 920 and 940 communicates (again), should be more reliable and give - slight performance increase. - * Some optimizations in audio mixing code. - * Some menu changes (background added, smaller font in ROM browser, savestate loader - now can select slots). - + 1M mode DMA transfers implemented (used by FMV games like Night Trap and Sewer Shark). - + Games now can run code from WORD RAM in 1M mode (fixes Adventures of Willy Beamish). - + "Cell arrange" address mapping is now emulated (Heart of the alien). - + "Color numeric operation" is now emulated (text in Lunar 2, Silpheed intro graphics). - + "Better sync" option added (prevents some games from hanging). - -1.14 - + Region autodetection now can be customized. - * When CDDA music tracks changed, old buffer contents were incorrectly played. Fixed. - * BRAM is now automatically formatted (no need to enter BIOS menu and format any more). - * Games now can be reset, CDDA music no longer breaks after loading another ISO. - * Fixed a race condition between 920 and 940 which sometimes caused CDDA music not to play. - + Savestates implemented for Sega/Mega CD. - + PCM sound added. - * Some mixer code rewritten in asm. 22kHz and 11kHz sound rates are now supported in - Mega CD mode (but mp3s must still be 44kHz stereo). - + Timer emulation added. - * CDC DMA tansfers fixed. Snatcher and probably some more games now boot. - * 2M word RAM -> VDP transfers fixed, no more corruption in Ecco and some other games. - -1.10 - + GP2X: Added experimental Sega CD support. - + GP2X: Added partial gmv movie playback support. - -0.964 (2006-12-03) - * GP2X: Fixed a sound buffer underflow issue on lower sample rate modes, which was - happening for NTSC games and causing sound clicks. - * GP2X: Redone key config to better support USB joysticks (now multiple joysticks - should be useable and configurable). - + GP2X: Added save confirmation option. - + GP2X: Added 940 CPU crash detection. - + ALL: UIQ3 port added. - -0.963 - * GP2X: Gamma-reset-on-entering-menu bug fixed. - * GP2X: Recompiled PicoDrive with gcc profiling option set as described here: - http://www.gp32x.com/board/index.php?showtopic=28490 - -0.962 - * GP2X: Fixed an issue with incorrect sounds in some games when dualcore operation - was enabled (for example punch sound in SOR). - * GP2X: Limited max volume to 90, because higher values often cause distortions. - * GP2X: Fixed a bug with lower res scaling. - * GP2X: Gamma is now reset on exit. - -0.96 - * ALL: Severely optimized MAME's YM2612 core, part of it is now rewritten in asm. - + GP2X: The YM2612's code now can be run in GP2X's ARM940T CPU, what causes large - performance increase. - * ALL: Accurate renderers are slightly faster now. - + GP2X: Using quadruple buffering instead of doublebuffer now, also updated - framelimitter, this should eliminate some scrolling and tearing problems. - * GP2X: Fixed some flickering issues of 8bit accurate renderer. - + GP2X: craigix's RAM timings now can be enabled in the menu (see advanced options). - + GP2X: Added ability to save config for specific games only. - + GP2X: Gamma control added (using GP2X's hardware capabilities for this). - * GP2X: Volume keys are now configurable. - + GP2X: GnoStiC added USB joystick support, I made it possible to use it for - player 2 control (currently untested). - * GP2X: squidgehack is now applied through kernel module (cleaner way). - -0.95 - * ALL: Fixed a bug in sprite renderer which was causing slowdowns for some games. - + GP2X: Added command line support - + GP2X: Added optional hardware scaling for lower-res games like Shining Force. - * ALL: Sound chips are now sampled 2 times per frame. This fixed some games which - had missing sounds (Vectorman 2 1st level, Thunder Force 3 water level, - etc.). - + ALL: Added another accurate 8-bit renderer which is slightly faster and made it - default. - -0.945 - + GP2X: Added frame limiter for frameskipped modes. - * GP2X: Increased brightness a bit (unused pixel bits now also contain data). - * GP2X: Suidgehack was not applied correctly (was applied before allocating some - high memory and had no effect). - -0.94 - + Added GP2X port. - * Improved interrupt timing, Mazin Saga and Burning Force now works. - * Rewritten renderer code to better suit GP2X, should be faster on other - ports too. - + Added support for banking used by 12-in-1 and 4-in-1 ROMs (thanks Haze). - + Added some protection device faking, used by some unlicensed games like - Super Bubble Bobble, King of Fighters, Elf Wor, ... (thanks to Haze again) - + Added primitive Virtua Racing SVP faking, so menus can be seen now. - -0.93 - * Fixed a problem with P900/P910 key configuration in FC mode. - * Improved shadow/hilight mode emulation. Still not perfect, but should be - enough for most games. - + Save state slots added. - + Region selector added. - -0.92 - VDP changes: - * VDP emulation is now more accurate (fixes flickering in Chase HQ II, - Super Hang-On and some other problems in other games). - * HV counter emulation is now much more accurate. Fixes the Asterix games, - line in Road Rash 3, etc. - * Minor sprite and layer scroll masking bugs fixed. - + Added partial interlace mode renderer (Sonic 2 vs mode) - * Fixed a crash in both renderers when certain size window layers were used. - + Added emulation of shadow/hilight operator sprites. Other shadow/hilight - effects are still unemulated. - + Sprite emulation is more accurate, sprite limit is emulated. - + Added "accurate sprites" option, which always draws sprites in correct - order and emulates sprite collision bit, but is significantly slower. - - Emulation changes: - * Improved interrupt handling, added deferred interrupt emulation - (Lemmings, etc). - + Added serial EEPROM SRAM support (Wonder Boy in Monster World, - Megaman - The Wily Wars and many EA sports games like NBA Jam). - + Implemented ROM banking for Super Street Fighter II - The New Challengers - * Updated to the latest version of DrZ80 core, integrated memory handlers - in it for better performance. A noticeable performance increase, but save - states may not work from the previous version (you can only use them with - sound disabled in that case). - + SRAM word read handler was using incorrect byte order, fixed. - - Changes in Cyclone 0.0086: - + Added missing CHK opcode handler (used by SeaQuest DSV). - + Added missing TAS opcode handler (Gargoyles,Bubba N Stix,...). As in real genesis, - memory write-back phase is ignored (but can be enabled in config.h if needed). - + Added missing NBCD and TRAPV opcode handlers. - + Added missing addressing mode for CMP/EOR. - + Added some minor optimizations. - - Removed 216 handlers for 2927 opcodes which were generated for invalid addressing modes. - + Fixed flags for ASL, NEG, NEGX, DIVU, ADDX, SUBX, ROXR. - + Bugs fixed in MOVEP, LINK, ADDQ, DIVS handlers. - * Undocumented flags for CHK, ABCD, SBCD and NBCD are now emulated the same way as in Musashi. - + Added Uninitialized Interrupt emulation. - + Altered timing for about half of opcodes to match Musashi's. - -0.80 - * Nearly all VDP code was rewritten in ARM asm. Gives ~10-25% performance - increase (depends on game). - * Optimized 32-column renderer not to render tiles offscreen, games which - use 32-column display (like Shining Force) run ~50% faster. - + Added new "Alternative renderer", which gives another ~30-45% performance - increase (in addition to mentioned above), but works only with some games, - because it is missing some features (it uses tile-based rendering - instead of default line-based and disables H-ints). - + Added "fit2" display mode for all FC gamers. It always uses 208x146 for - P800 and 208x208 for all other phones. - + Added volume control for Motorolas (experimental). - - VDP changes: - + Added support for vertical window (used by Vapor Trail, Mercs, GRIND - Stormer and others). - + Added sprite masking (hiding), adds some speed. - + Added preliminary H counter emulation. Comix Zone and Sonic 3D Blast - special stage are now playable. - + Added column based vertical scrolling (Gunstar Heroes battleship level, - Sonic and Knuckles lava boss, etc). - - Emulation changes: - + Re-added and improved Z80 faking when Z80 is disabled. Many games now can - be played without enabling Z80 (Lost Vikings, Syndicate, etc), but some - still need it (International Superstar Soccer Deluxe). - * Improved ym2612 timers, Outrun music plays at correct speed, voices in - Earthworm Jim play better, more games play sound. - * I/O registers now remember their values (needed for Pirates! Gold) - + Added support for 6 button pad. - - Changes in Cyclone 0.0083wip: - + Added missing CHK opcode (used by SeaQuest DSV). - + Added missing TAS opcode (Gargoyles). As in real genesis, write-back phase - is ignored (but is enabled for other systems). - - Backported stuff from Snes9x: - * Fixed Pxxx jog up/down which were not working in game. - + Added an option to gzip save states to save space. - + The emulator now pauses whenever it is loosing focus, so it will now pause - when alarm/ponecall/battery low/... windows come up. - - Removed 'pause on phonecall' feature, as it is no longer needed. - + Video fix for asian A1000s. - -0.70 - * Started using tools from "Symbian GCC Improvement Project", which give - considerable speed increase (~4fps in "center 90" mode). - * Rewrote some drawing routines in ARM assembly (gives ~6 more fps in - "center 90" mode). - * Minor improvement to 0 and 180 "fit" modes. Now they look slightly better - and are faster. - * Minor stability improvements (emulator is less likely to crash). - + Added some background for OSD text for better readability. - + Added Pal/NTSC detection. This is needed for proper sound speed. - + Implemented Reesy's DrZ80 Z80 emu. Made some changes to it with hope to make - it faster. - + Implemented ym2612 emu from the MAME project. Runs well but sometimes sounds - a bit weird. Could be a little faster, so made some changes too. - + Implemented SN76489 emu from the MAME project. - + Added two separate sound output methods (mediaserver and cmaudiofb) with - autodetection (needs testing). - * Fixed VDP DMA fill emulation (as described in Charles MacDonald's docs), - fixes Contra and some other games. - -0.301 - Launcher: - * Launcher now starts emulation process from current directory, - not from hardcoded paths. - * Improved 'pause on call' feature, should hopefully work with Motorola phones. - -0.30 (2006-01-07) - Initial release. - - -License -------- - -This program and it's code is released under the terms of MAME license: - - Redistribution and use of this code or any derivative works are permitted - provided that the following conditions are met: - - * Redistributions may not be sold, nor may they be used in a commercial - product or activity. - - * Redistributions that are modified from the original source must include the - complete source code, including the source code for all components used by a - binary built from the modified sources. However, as a special exception, the - source code distributed need not include anything that is normally distributed - (in either source or binary form) with the major components (compiler, kernel, - and so on) of the operating system on which the executable runs, unless that - component itself accompanies the executable. - - * Redistributions must reproduce the above copyright notice, this list of - conditions and the following disclaimer in the documentation and/or other - materials provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - POSSIBILITY OF SUCH DAMAGE. - -SEGA/Genesis/MegaDrive/SEGA-CD/Mega-CD/32X are trademarks of -Sega Enterprises Ltd. - +#include "../README.md" diff --git a/platform/common/arm_utils.h b/platform/common/arm_utils.h index 21204c92..213186e5 100644 --- a/platform/common/arm_utils.h +++ b/platform/common/arm_utils.h @@ -1,9 +1,10 @@ -void bgr444_to_rgb32(void *to, void *from); +void bgr444_to_rgb32(void *to, void *from, unsigned entries); void bgr444_to_rgb32_sh(void *to, void *from); -void vidcpy_m2(void *dest, void *src, int m32col, int with_32c_border); -void vidcpy_m2_rot(void *dest, void *src, int m32col, int with_32c_border); +void vidcpy_8bit(void *dest, void *src, int x_y, int w_h); +void vidcpy_8bit_rot(void *dest, void *src, int x_y, int w_h); + void spend_cycles(int c); // utility void rotated_blit8 (void *dst, void *linesx4, int y, int is_32col); diff --git a/platform/common/arm_utils.s b/platform/common/arm_utils.s index 9e8d9f25..e1d9a5a5 100644 --- a/platform/common/arm_utils.s +++ b/platform/common/arm_utils.s @@ -1,6 +1,7 @@ /* * some color conversion and blitting routines * (C) notaz, 2006-2009 + * (C) irixxxx, 2020-2023 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -9,10 +10,10 @@ .text .align 4 -@ Convert 0000bbb0 ggg0rrr0 0000bbb0 ggg0rrr0 -@ to 00000000 rrr00000 ggg00000 bbb00000 ... +@ Convert 0000bbbb ggggrrrr 0000bbbb ggggrrrr +@ to 00000000 rrrr0000 gggg0000 bbbb0000 ... -@ lr = 0x00e000e0, out: r3=lower_pix, r2=higher_pix; trashes rin +@ lr = 0x00f000f0, out: r3=lower_pix, r2=higher_pix; trashes rin .macro convRGB32_2 rin sh=0 and r2, lr, \rin, lsr #4 @ blue and r3, \rin, lr @@ -25,13 +26,13 @@ mov r3, r3, ror #17 @ shadow mode .elseif \sh == 2 adds r3, r3, #0x40000000 @ green - orrcs r3, r3, #0xe0000000 + orrcs r3, r3, lr, lsl #24 mov r3, r3, ror #8 adds r3, r3, #0x40000000 - orrcs r3, r3, #0xe0000000 + orrcs r3, r3, lr, lsl #24 mov r3, r3, ror #16 adds r3, r3, #0x40000000 - orrcs r3, r3, #0xe0000000 + orrcs r3, r3, lr, lsl #24 mov r3, r3, ror #24 .else mov r3, r3, ror #16 @ r3=low @@ -47,41 +48,38 @@ .elseif \sh == 2 mov r2, r2, ror #8 adds r2, r2, #0x40000000 @ blue - orrcs r2, r2, #0xe0000000 + orrcs r2, r2, lr, lsl #24 mov r2, r2, ror #8 adds r2, r2, #0x40000000 - orrcs r2, r2, #0xe0000000 + orrcs r2, r2, lr, lsl #24 mov r2, r2, ror #8 adds r2, r2, #0x40000000 - orrcs r2, r2, #0xe0000000 + orrcs r2, r2, lr, lsl #24 mov r2, r2, ror #8 .endif orr r2, r2, r2, lsr #3 -.if \sh == 1 - str r2, [r0, #0x40*2*4] -.endif str r2, [r0], #4 .endm -.global bgr444_to_rgb32 @ void *to, void *from +.global bgr444_to_rgb32 @ void *to, void *from, unsigned entries bgr444_to_rgb32: stmfd sp!, {r4-r7,lr} - mov r12, #0x40>>3 @ repeats - mov lr, #0x00e00000 - orr lr, lr, #0x00e0 + mov r12, r2, lsr #3 @ repeats + mov lr, #0x00f00000 + orr lr, lr, #0x00f0 .loopRGB32: - subs r12, r12, #1 - ldmia r1!, {r4-r7} convRGB32_2 r4 convRGB32_2 r5 convRGB32_2 r6 convRGB32_2 r7 + + subs r12, r12, #1 bgt .loopRGB32 ldmfd sp!, {r4-r7,pc} @@ -94,28 +92,29 @@ bgr444_to_rgb32_sh: mov r12, #0x40>>3 @ repeats add r0, r0, #0x40*4 - mov lr, #0x00e00000 - orr lr, lr, #0x00e0 + mov lr, #0x00f00000 + orr lr, lr, #0x00f0 .loopRGB32sh: - subs r12, r12, #1 + ldmia r1!, {r4-r7} + convRGB32_2 r4, 2 + convRGB32_2 r5, 2 + convRGB32_2 r6, 2 + convRGB32_2 r7, 2 + subs r12, r12, #1 + bgt .loopRGB32sh + + mov r12, #0x40>>3 @ repeats + sub r1, r1, #0x40*2 + and lr, lr, lr, lsl #1 @ kill LSB for correct shadow colors + +.loopRGB32hi: ldmia r1!, {r4-r7} convRGB32_2 r4, 1 convRGB32_2 r5, 1 convRGB32_2 r6, 1 convRGB32_2 r7, 1 - bgt .loopRGB32sh - - mov r12, #0x40>>3 @ repeats - sub r1, r1, #0x40*2 - -.loopRGB32hi: - ldmia r1!, {r4-r7} - convRGB32_2 r4, 2 - convRGB32_2 r5, 2 - convRGB32_2 r6, 2 - convRGB32_2 r7, 2 subs r12, r12, #1 bgt .loopRGB32hi @@ -126,71 +125,138 @@ bgr444_to_rgb32_sh: @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - -@ mode2 blitter -.global vidcpy_m2 @ void *dest, void *src, int m32col, int with_32c_border -vidcpy_m2: +.global vidcpy_8bit @ void *dest, void *src, int x_y, int w_h +vidcpy_8bit: stmfd sp!, {r4-r6,lr} - mov r12, #224 @ lines - add r0, r0, #320*8 + mov r12, r2, lsl #16 @ y + + mov r4, r12, lsr #16-8 @ 320*y = 256*y+64*y + add r4, r4, r12, lsr #16-6 + add r0, r0, r4 @ pd += 320*y + x + add r0, r0, r2, lsr #16 + + add r4, r4, r12, lsr #16-3 @ 328*y = 320*y + 8*y + add r1, r1, r4 @ ps += 328*y + x + 8 + add r1, r1, r2, lsr #16 add r1, r1, #8 - mov lr, #0 - tst r2, r2 - movne lr, #64 - tstne r3, r3 - addne r0, r0, #32 + mov lr, r3, lsr #16 @ w + mov r12, r3, lsl #16 @ h -vidCpyM2_loop_out: - mov r6, #10 - sub r6, r6, lr, lsr #5 @ -= 2 in 32col mode -vidCpyM2_loop: +vidCpy8bit_loop_out: + movs r6, lr, lsr #5 +@ beq vidCpy8bit_loop_end +vidCpy8bit_loop: subs r6, r6, #1 ldmia r1!, {r2-r5} stmia r0!, {r2-r5} ldmia r1!, {r2-r5} stmia r0!, {r2-r5} - bne vidCpyM2_loop + bne vidCpy8bit_loop - subs r12,r12,#1 - add r0, r0, lr - add r1, r1, #8 - add r1, r1, lr - bne vidCpyM2_loop_out + ands r6, lr, #0x0018 + beq vidCpy8bit_loop_end +vidCpy8bit_loop2: + ldmia r1!, {r2-r3} + subs r6, r6, #8 + stmia r0!, {r2-r3} + bne vidCpy8bit_loop2 + +vidCpy8bit_loop_end: + subs r12,r12,#1<<16 + add r0, r0, #320 + sub r0, r0, lr + add r1, r1, #328 + sub r1, r1, lr + bne vidCpy8bit_loop_out ldmfd sp!, {r4-r6,pc} -.global vidcpy_m2_rot @ void *dest, void *src, int m32col, int with_32c_border -vidcpy_m2_rot: - stmfd sp!,{r4-r8,lr} +.global vidcpy_8bit_rot @ void *dest, void *src, int x_y, int w_h +vidcpy_8bit_rot: + stmfd sp!, {r4-r10,lr} + + mov r12, r2, lsl #16 @ y + + add r0, r0, r12, lsr #16 @ pd += y + (319-x)*240 + mov r4, #320 + sub r4, r4, #1 + sub r4, r4, r2, lsr #16 @ (319-x) + add r0, r0, r4, lsl #8 + sub r0, r0, r4, lsl #4 + + mov r4, r12, lsr #16-8 @ 328*y = 256*y + 64*y + 8*y + add r4, r4, r12, lsr #16-6 + add r4, r4, r12, lsr #16-3 + add r1, r1, r4 @ ps += 328*y + x + 8 + add r1, r1, r2, lsr #16 add r1, r1, #8 - tst r2, r2 - subne r1, r1, #32 @ adjust - mov r4, r0 - mov r5, r1 - mov r6, r2 - mov r7, #8+4 + mov lr, r3, lsr #16 @ w + mov r12, r3, lsl #16 @ h -vidcpy_m2_rot_loop: - @ a bit lame but oh well.. - mov r0, r4 - mov r1, r5 - mov r2, r7 - mov r3, r6 mov r8, #328 - adr lr, after_rot_blit8 - stmfd sp!,{r4-r8,lr} - b rotated_blit8_2 +vidCpy8bitrot_loop_out: + mov r10, r0 + movs r9, lr, lsr #2 +@ beq vidCpy8bitrot_loop_end +vidCpy8bitrot_loop: + mov r6, r1 + ldr r2, [r6], r8 + ldr r3, [r6], r8 + ldr r4, [r6], r8 + ldr r5, [r6], r8 -after_rot_blit8: - add r5, r5, #328*4 - add r7, r7, #4 - cmp r7, #224+8+4 - ldmgefd sp!,{r4-r8,pc} - b vidcpy_m2_rot_loop + mov r6, r2, lsl #24 + mov r6, r6, lsr #8 + orr r6, r6, r3, lsl #24 + mov r6, r6, lsr #8 + orr r6, r6, r4, lsl #24 + mov r6, r6, lsr #8 + orr r6, r6, r5, lsl #24 + str r6, [r0], #-240 + + and r6, r3, #0xff00 + and r7, r2, #0xff00 + orr r6, r6, r7, lsr #8 + and r7, r4, #0xff00 + orr r6, r6, r7, lsl #8 + and r7, r5, #0xff00 + orr r6, r6, r7, lsl #16 + str r6, [r0], #-240 + + and r6, r4, #0xff0000 + and r7, r2, #0xff0000 + orr r6, r6, r7, lsr #16 + and r7, r3, #0xff0000 + orr r6, r6, r7, lsr #8 + and r7, r5, #0xff0000 + orr r6, r6, r7, lsl #8 + str r6, [r0], #-240 + + mov r6, r5, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r4, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r3, lsr #24 + mov r6, r6, lsl #8 + orr r6, r6, r2, lsr #24 + str r6, [r0], #-240 + + subs r9, r9, #1 + add r1, r1, #4 + bne vidCpy8bitrot_loop + +vidCpy8bitrot_loop_end: + subs r12,r12,#4<<16 + add r0, r10, #4 + sub r1, r1, lr + add r1, r1, #4*328 + bne vidCpy8bitrot_loop_out + + ldmfd sp!, {r4-r10,pc} .global rotated_blit8 @ void *dst, void *linesx4, u32 y, int is_32col diff --git a/platform/common/common.mak b/platform/common/common.mak index 3a836e15..68ca6ff6 100644 --- a/platform/common/common.mak +++ b/platform/common/common.mak @@ -8,8 +8,9 @@ asm_memory = 0 asm_render = 0 asm_ym2612 = 0 asm_misc = 0 -asm_cdpico = 0 asm_cdmemory = 0 +asm_32xdraw = 0 +asm_32xmemory = 0 asm_mix = 0 endif @@ -41,6 +42,10 @@ ifeq "$(pprof)" "1" DEFINES += PPROF SRCS_COMMON += $(R)platform/linux/pprof.c endif +ifeq "$(gperf)" "1" +DEFINES += GPERF +LDFLAGS += -lprofiler -lstdc++ +endif # ARM asm stuff ifeq "$(ARCH)" "arm" @@ -50,31 +55,31 @@ SRCS_COMMON += $(R)pico/draw_arm.S $(R)pico/draw2_arm.S endif ifeq "$(asm_memory)" "1" DEFINES += _ASM_MEMORY_C -SRCS_COMMON += $(R)pico/memory_arm.s +SRCS_COMMON += $(R)pico/memory_arm.S endif ifeq "$(asm_ym2612)" "1" DEFINES += _ASM_YM2612_C -SRCS_COMMON += $(R)pico/sound/ym2612_arm.s +SRCS_COMMON += $(R)pico/sound/ym2612_arm.S endif ifeq "$(asm_misc)" "1" DEFINES += _ASM_MISC_C SRCS_COMMON += $(R)pico/misc_arm.s SRCS_COMMON += $(R)pico/cd/misc_arm.s endif -ifeq "$(asm_cdpico)" "1" -DEFINES += _ASM_CD_PICO_C -SRCS_COMMON += $(R)pico/cd/mcd_arm.s -endif ifeq "$(asm_cdmemory)" "1" DEFINES += _ASM_CD_MEMORY_C -SRCS_COMMON += $(R)pico/cd/memory_arm.s +SRCS_COMMON += $(R)pico/cd/memory_arm.S endif ifeq "$(asm_32xdraw)" "1" DEFINES += _ASM_32X_DRAW -SRCS_COMMON += $(R)pico/32x/draw_arm.s +SRCS_COMMON += $(R)pico/32x/draw_arm.S +endif +ifeq "$(asm_32xmemory)" "1" +DEFINES += _ASM_32X_MEMORY_C +SRCS_COMMON += $(R)pico/32x/memory_arm.S endif ifeq "$(asm_mix)" "1" -SRCS_COMMON += $(R)pico/sound/mix_arm.s +SRCS_COMMON += $(R)pico/sound/mix_arm.S endif endif # ARCH=arm @@ -93,9 +98,9 @@ DEFINES += NO_SMS endif # CD SRCS_COMMON += $(R)pico/cd/mcd.c $(R)pico/cd/memory.c $(R)pico/cd/sek.c \ - $(R)pico/cd/LC89510.c $(R)pico/cd/cd_sys.c $(R)pico/cd/cd_file.c \ - $(R)pico/cd/cue.c $(R)pico/cd/gfx.c $(R)pico/cd/gfx_dma.c \ - $(R)pico/cd/misc.c $(R)pico/cd/pcm.c $(R)pico/cd/buffering.c + $(R)pico/cd/cdc.c $(R)pico/cd/cdd.c $(R)pico/cd/cd_image.c \ + $(R)pico/cd/cd_parse.c $(R)pico/cd/gfx.c $(R)pico/cd/gfx_dma.c \ + $(R)pico/cd/misc.c $(R)pico/cd/pcm.c # 32X ifneq "$(no_32x)" "1" SRCS_COMMON += $(R)pico/32x/32x.c $(R)pico/32x/memory.c $(R)pico/32x/draw.c \ @@ -107,6 +112,7 @@ endif SRCS_COMMON += $(R)pico/pico/pico.c $(R)pico/pico/memory.c $(R)pico/pico/xpcm.c # carthw SRCS_COMMON += $(R)pico/carthw/carthw.c +SRCS_COMMON += $(R)pico/carthw/eeprom_spi.c # SVP SRCS_COMMON += $(R)pico/carthw/svp/svp.c $(R)pico/carthw/svp/memory.c \ $(R)pico/carthw/svp/ssp16.c @@ -116,8 +122,9 @@ SRCS_COMMON += $(R)pico/carthw/svp/stub_arm.S SRCS_COMMON += $(R)pico/carthw/svp/compiler.c endif # sound -SRCS_COMMON += $(R)pico/sound/sound.c +SRCS_COMMON += $(R)pico/sound/sound.c $(R)pico/sound/resampler.c SRCS_COMMON += $(R)pico/sound/sn76496.c $(R)pico/sound/ym2612.c +SRCS_COMMON += $(R)pico/sound/emu2413/emu2413.c ifneq "$(ARCH)$(asm_mix)" "arm1" SRCS_COMMON += $(R)pico/sound/mix.c endif @@ -142,7 +149,7 @@ endif # --- Z80 --- ifeq "$(use_drz80)" "1" DEFINES += _USE_DRZ80 -SRCS_COMMON += $(R)cpu/DrZ80/drz80.s +SRCS_COMMON += $(R)cpu/DrZ80/drz80.S endif # ifeq "$(use_cz80)" "1" @@ -161,8 +168,16 @@ SRCS_COMMON += $(R)cpu/sh2/compiler.c ifdef drc_debug DEFINES += DRC_DEBUG=$(drc_debug) SRCS_COMMON += $(R)cpu/sh2/mame/sh2dasm.c -SRCS_COMMON += $(R)platform/libpicofe/linux/host_dasm.c -LDFLAGS += -lbfd -lopcodes -liberty +DASM = $(R)platform/libpicofe/linux/host_dasm.c +DASMLIBS = -lbfd -lopcodes -liberty +ifeq ("$(ARCH)",$(filter "$(ARCH)","arm" "mipsel")) +ifeq ($(filter_out $(shell $(CC) --print-file-name=libbfd.so),"/"),) +DASM = $(R)platform/common/host_dasm.c +DASMLIBS = +endif +endif +SRCS_COMMON += $(DASM) +LDFLAGS += $(DASMLIBS) endif endif # use_sh2drc SRCS_COMMON += $(R)cpu/sh2/mame/sh2pico.c @@ -177,18 +192,22 @@ ifeq "$(use_cyclone)" "1" $(FR)pico/pico.c: $(FR)cpu/cyclone/Cyclone.h endif +CYCLONE_CONFIG ?= cyclone_config.h + $(FR)cpu/cyclone/Cyclone.h: @echo "Cyclone submodule is missing, please run 'git submodule update --init'" @false -$(FR)cpu/cyclone/Cyclone.s: $(FR)cpu/cyclone_config.h +$(FR)cpu/cyclone/Cyclone.s: $(FR)cpu/$(CYCLONE_CONFIG) @echo building Cyclone... - @make -C $(R)cpu/cyclone/ CONFIG_FILE=../cyclone_config.h + @export CC=$(CYCLONE_CC) CXX=$(CYCLONE_CXX) CFLAGS=-O2 CXXFLAGS=-O2 CPPFLAGS="" LDFLAGS="" && \ + make -C $(R)cpu/cyclone/ CONFIG_FILE=../$(CYCLONE_CONFIG) HAVE_ARMv6=$(HAVE_ARMv6) $(FR)cpu/cyclone/Cyclone.s: $(FR)cpu/cyclone/*.cpp $(FR)cpu/cyclone/*.h -$(FR)cpu/musashi/m68kops.c: +$(FR)cpu/musashi/m68kops.c: $(FR)cpu/musashi/m68k_in.c @make -C $(R)cpu/musashi +$(FR)cpu/musashi/m68kcpu.c: $(FR)cpu/musashi/m68kops.c deps_set = yes endif # deps_set diff --git a/platform/common/config_file.c b/platform/common/config_file.c index 324c993e..d5f583d9 100644 --- a/platform/common/config_file.c +++ b/platform/common/config_file.c @@ -24,7 +24,7 @@ static char *mystrip(char *str); #include "menu_pico.h" #include "emu.h" -#include +#include // always output DOS endlines #ifdef _WIN32 @@ -35,7 +35,7 @@ static char *mystrip(char *str); static int seek_sect(FILE *f, const char *section) { - char line[128], *tmp; + char line[640], *tmp; int len; len = strlen(section); @@ -81,6 +81,19 @@ static void keys_write(FILE *fn, int dev_id, const int *binds) } } + for (i = 0; me_ctrl_actions[i].name != NULL; i++) { + mask = me_ctrl_actions[i].mask; + if (mask & binds[IN_BIND_OFFS(k, IN_BINDTYPE_PLAYER34)]) { + strncpy(act, me_ctrl_actions[i].name, 31); + fprintf(fn, "bind %s = player3 %s" NL, name, mystrip(act)); + } + mask = me_ctrl_actions[i].mask << 16; + if (mask & binds[IN_BIND_OFFS(k, IN_BINDTYPE_PLAYER34)]) { + strncpy(act, me_ctrl_actions[i].name, 31); + fprintf(fn, "bind %s = player4 %s" NL, name, mystrip(act)); + } + } + for (i = 0; emuctrl_actions[i].name != NULL; i++) { mask = emuctrl_actions[i].mask; if (mask & binds[IN_BIND_OFFS(k, IN_BINDTYPE_EMU)]) { @@ -96,7 +109,7 @@ int config_write(const char *fname) FILE *fn = NULL; menu_entry *me; int t; - char line[128]; + char line[640]; fn = fopen(fname, "w"); if (fn == NULL) @@ -105,7 +118,7 @@ int config_write(const char *fname) for (me = me_list_get_first(); me != NULL; me = me_list_get_next()) { int dummy; - if (!me->need_to_save || !me->enabled) + if (!me->need_to_save) continue; if (me->name == NULL || me->name[0] == 0) continue; @@ -116,21 +129,26 @@ int config_write(const char *fname) else if (me->beh == MB_OPT_RANGE || me->beh == MB_OPT_CUSTRANGE) { fprintf(fn, "%s = %i" NL, me->name, *(int *)me->var); } - else if (me->beh == MB_OPT_ENUM && me->data != NULL) { + else if (me->beh == MB_OPT_ENUM) { const char **names = (const char **)me->data; + if (names == NULL) + continue; for (t = 0; names[t] != NULL; t++) { if (*(int *)me->var == t) { - strncpy(line, names[t], sizeof(line)); + strncpy(line, names[t], sizeof(line)-1); + line[sizeof(line)-1] = '\0'; goto write_line; } } } else if (me->generate_name != NULL) { - strncpy(line, me->generate_name(0, &dummy), sizeof(line)); + strncpy(line, me->generate_name(me->id, &dummy), sizeof(line)-1); + line[sizeof(line)-1] = '\0'; goto write_line; } else - lprintf("config: unhandled write: %i\n", me->id); + lprintf("config: unhandled write: '%s' id %d behavior %d\n", + me->name, me->id, me->beh); continue; write_line: @@ -165,7 +183,7 @@ write_line: int config_writelrom(const char *fname) { - char line[128], *tmp, *optr = NULL; + char line[640], *tmp, *optr = NULL; char *old_data = NULL; int size; FILE *f; @@ -212,7 +230,7 @@ int config_writelrom(const char *fname) int config_readlrom(const char *fname) { - char line[128], *tmp; + char line[640], *tmp; int i, len, ret = -1; FILE *f; @@ -234,9 +252,9 @@ int config_readlrom(const char *fname) tmp++; mystrip(tmp); - len = sizeof(rom_fname_loaded); + len = sizeof(rom_fname_loaded)-1; strncpy(rom_fname_loaded, tmp, len); - rom_fname_loaded[len-1] = 0; + rom_fname_loaded[len] = 0; ret = 0; break; } @@ -259,60 +277,65 @@ static int custom_read(menu_entry *me, const char *var, const char *val) case MA_OPT_SOUND_QUALITY: if (strcasecmp(var, "Sound Quality") != 0) return 0; - PsndRate = strtoul(val, &tmp, 10); - if (PsndRate < 8000 || PsndRate > 44100) - PsndRate = 22050; + PicoIn.sndRate = strtoul(val, &tmp, 10); + if (PicoIn.sndRate < 8000 || PicoIn.sndRate > 54000) { + if (strncasecmp(tmp, "native", 6) == 0) { + tmp += 6; + PicoIn.sndRate = 53000; + } else + PicoIn.sndRate = 22050; + } if (*tmp == 'H' || *tmp == 'h') tmp++; if (*tmp == 'Z' || *tmp == 'z') tmp++; while (*tmp == ' ') tmp++; if (strcasecmp(tmp, "stereo") == 0) { - PicoOpt |= POPT_EN_STEREO; + PicoIn.opt |= POPT_EN_STEREO; } else if (strcasecmp(tmp, "mono") == 0) { - PicoOpt &= ~POPT_EN_STEREO; + PicoIn.opt &= ~POPT_EN_STEREO; } else return 0; return 1; + case MA_OPT_SOUND_ALPHA: + if (strcasecmp(var, "Filter strength") != 0) return 0; + PicoIn.sndFilterAlpha = 0x10000 * atof(val); + return 1; + case MA_OPT_REGION: if (strcasecmp(var, "Region") != 0) return 0; if (strncasecmp(val, "Auto: ", 6) == 0) { const char *p = val + 5, *end = val + strlen(val); int i; - PicoRegionOverride = PicoAutoRgnOrder = 0; + PicoIn.regionOverride = PicoIn.autoRgnOrder = 0; for (i = 0; p < end && i < 3; i++) { while (*p == ' ') p++; if (p[0] == 'J' && p[1] == 'P') { - PicoAutoRgnOrder |= 1 << (i*4); + PicoIn.autoRgnOrder |= 1 << (i*4); } else if (p[0] == 'U' && p[1] == 'S') { - PicoAutoRgnOrder |= 4 << (i*4); + PicoIn.autoRgnOrder |= 4 << (i*4); } else if (p[0] == 'E' && p[1] == 'U') { - PicoAutoRgnOrder |= 8 << (i*4); + PicoIn.autoRgnOrder |= 8 << (i*4); } while (*p != ' ' && *p != 0) p++; if (*p == 0) break; } } else if (strcasecmp(val, "Auto") == 0) { - PicoRegionOverride = 0; + PicoIn.regionOverride = 0; } else if (strcasecmp(val, "Japan NTSC") == 0) { - PicoRegionOverride = 1; + PicoIn.regionOverride = 1; } else if (strcasecmp(val, "Japan PAL") == 0) { - PicoRegionOverride = 2; + PicoIn.regionOverride = 2; } else if (strcasecmp(val, "USA") == 0) { - PicoRegionOverride = 4; + PicoIn.regionOverride = 4; } else if (strcasecmp(val, "Europe") == 0) { - PicoRegionOverride = 8; + PicoIn.regionOverride = 8; } else return 0; return 1; - case MA_CDOPT_READAHEAD: - if (strcasecmp(var, "ReadAhead buffer") != 0) return 0; - PicoCDBuffers = atoi(val) / 2; - return 1; - case MA_32XOPT_MSH2_CYCLES: currentConfig.msh2_khz = atoi(val); Pico32xSetClocks(currentConfig.msh2_khz * 1000, 0); @@ -327,29 +350,21 @@ static int custom_read(menu_entry *me, const char *var, const char *val) currentConfig.gamma = atoi(val); return 1; + case MA_OPT2_MAX_FRAMESKIP: + currentConfig.max_skip = atoi(val); + return 1; + /* PSP */ - case MA_OPT3_SCALE: - if (strcasecmp(var, "Scale factor") != 0) return 0; - currentConfig.scale = atof(val); - return 1; - case MA_OPT3_HSCALE32: - if (strcasecmp(var, "Hor. scale (for low res. games)") != 0) return 0; - currentConfig.hscale32 = atof(val); - return 1; - case MA_OPT3_HSCALE40: - if (strcasecmp(var, "Hor. scale (for hi res. games)") != 0) return 0; - currentConfig.hscale40 = atof(val); - return 1; case MA_OPT3_VSYNC: // XXX: use enum if (strcasecmp(var, "Wait for vsync") != 0) return 0; if (strcasecmp(val, "never") == 0) { - currentConfig.EmuOpt &= ~0x12000; + currentConfig.EmuOpt &= ~(EOPT_VSYNC|EOPT_VSYNC_MODE); } else if (strcasecmp(val, "sometimes") == 0) { - currentConfig.EmuOpt |= 0x12000; + currentConfig.EmuOpt |= (EOPT_VSYNC|EOPT_VSYNC_MODE); } else if (strcasecmp(val, "always") == 0) { - currentConfig.EmuOpt &= ~0x12000; - currentConfig.EmuOpt |= 0x02000; + currentConfig.EmuOpt &= ~EOPT_VSYNC_MODE; + currentConfig.EmuOpt |= EOPT_VSYNC; } else return 0; return 1; @@ -373,12 +388,12 @@ static int parse_bind_val(const char *val, int *type) int player, shift = 0; player = atoi(val + 6) - 1; - if (player > 1) + if (player > 3) return -1; - if (player == 1) + if (player & 1) shift = 16; - *type = IN_BINDTYPE_PLAYER12; + *type = IN_BINDTYPE_PLAYER12 + (player >> 1); for (i = 0; me_ctrl_actions[i].name != NULL; i++) { if (strncasecmp(me_ctrl_actions[i].name, val + 8, strlen(val + 8)) == 0) return me_ctrl_actions[i].mask << shift; @@ -396,7 +411,7 @@ static int parse_bind_val(const char *val, int *type) static void keys_parse_all(FILE *f) { - char line[256], *var, *val; + char line[640], *var, *val; int dev_id = -1; int acts, type; int ret; @@ -422,12 +437,13 @@ static void keys_parse_all(FILE *f) acts = parse_bind_val(val, &type); if (acts == -1) { lprintf("config: unhandled action \"%s\"\n", val); - return; + continue; } mystrip(var + 5); in_config_bind_key(dev_id, var + 5, acts, type); } + in_clean_binds(); } static void parse(const char *var, const char *val, int *keys_encountered) @@ -504,7 +520,7 @@ bad_val: int config_readsect(const char *fname, const char *section) { - char line[128], *var, *val; + char line[640], *var, *val; int keys_encountered = 0; FILE *f; int ret; diff --git a/platform/common/disarm.c b/platform/common/disarm.c new file mode 100644 index 00000000..90d0b80d --- /dev/null +++ b/platform/common/disarm.c @@ -0,0 +1,494 @@ +/* + * Copyright (c) 2012 Wojtek Kaniewski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include + +#define IMM_FORMAT "0x%x" +//#define IMM_FORMAT "%d" +#define ADDR_FORMAT "0x%x" + +static inline unsigned int rol(unsigned int value, unsigned int shift) +{ + shift &= 31; + + return (value >> shift) | (value << (32 - shift)); +} + +static inline const char *condition(unsigned int insn) +{ + const char *conditions[16] = { "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", "hi", "ls", "ge", "lt", "gt", "le", "", "nv" }; + return conditions[(insn >> 28) & 0x0f]; +} + +static inline const char *register_name(unsigned int reg) +{ + const char *register_names[16] = { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "sp", "lr", "pc" }; + return register_names[reg & 0x0f]; +} + +static const char *register_list(unsigned int list, char *buf, size_t buf_len) +{ + int i; + + buf[0] = 0; + + for (i = 0; i < 16; i++) + { + if ((list >> i) & 1) + { + snprintf(buf + strlen(buf), buf_len - strlen(buf), "%s%s", (buf[0] == 0) ? "" : ",", register_name(i)); + } + } + + return buf; +} + +static const char *shift(unsigned int insn, char *buf, size_t buf_len) +{ + unsigned int imm = (insn >> 7) & 0x1f; + const char *rn = register_name(insn >> 8); + unsigned int type = (insn >> 4) & 0x07; + + switch (type) + { + case 0: + snprintf(buf, buf_len, (imm != 0) ? ",lsl #%d" : "", imm); + break; + case 1: + snprintf(buf, buf_len, ",lsl %s", rn); + break; + case 2: + snprintf(buf, buf_len, ",lsr #%d", imm ? imm : 32); + break; + case 3: + snprintf(buf, buf_len, ",lsr %s", rn); + break; + case 4: + snprintf(buf, buf_len, ",asr #%d", imm ? imm : 32); + break; + case 5: + snprintf(buf, buf_len, ",asr %s", rn); + break; + case 6: + snprintf(buf, buf_len, (imm != 0) ? ",ror #%d" : ",rrx", imm); + break; + case 7: + snprintf(buf, buf_len, ",ror %s", rn); + break; + } + + return buf; +} + +static const char *immediate(unsigned int imm, int negative, int show_if_zero, char *buf, size_t buf_len) +{ + if (imm || show_if_zero) + { + snprintf(buf, buf_len, ",#%s" IMM_FORMAT, (negative) ? "-" : "", imm); + return buf; + } + + return ""; +} + +static int data_processing(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + unsigned int oper = (insn >> 21) & 15; + const char *names[16] = { "and", "eor", "sub", "rsb", "add", "adc", "sbc", "rsc", "tst", "teq", "cmp", "cmn", "orr", "mov", "bic", "mvn" }; + const char *name; + const char *s; + unsigned int rd; + unsigned int rn; + int is_move = ((oper == 13) || (oper == 15)); + int is_test = ((oper >= 8) && (oper <= 11)); + char tmp_buf[64]; + + name = names[oper]; + s = ((insn >> 20) & 1) ? "s" : ""; + rn = (insn >> 16) & 15; + rd = (insn >> 12) & 15; + + /* mov r0,r0,r0 is a nop */ + if (insn == 0xe1a00000) + { + snprintf(buf, buf_len, "nop"); + return 1; + } + + /* mrs */ + if ((insn & 0x0fbf0fff) == 0x010f0000) + { + const char *psr = ((insn >> 22) & 1) ? "spsr" : "cpsr"; + const char *rd = register_name(insn >> 12); + + snprintf(buf, buf_len, "mrs%s %s,%s", condition(insn), rd, psr); + + return 1; + } + + /* msr flag only*/ + if ((insn & 0x0db0f000) == 0x0120f000) + { + const char *psr = ((insn >> 22) & 1) ? "spsr" : "cpsr"; + const char *suffix; + + switch ((insn >> 16) & 15) + { + case 9: + suffix = ""; + break; + case 8: + suffix = "_f"; + break; + case 1: + suffix = "_c"; + break; + default: + return 0; + } + + if ((insn >> 25) & 1) + { + unsigned int imm = rol(insn & 0x000000ff, ((insn >> 8) & 15) * 2); + + snprintf(buf, buf_len, "msr%s %s%s,#" IMM_FORMAT, condition(insn), psr, suffix, imm); + } + else + { + const char *rm = register_name(insn >> 0); + + if (((insn >> 4) & 255) != 0) + { + return 0; + } + + snprintf(buf, buf_len, "msr%s %s%s,%s", condition(insn), psr, suffix, rm); + } + + return 1; + } + + if (((insn >> 25) & 1) == 0) + { + unsigned int rm; + + rm = (insn & 15); + + if (is_move) + { + snprintf(buf, buf_len, "%s%s%s %s,%s%s", name, condition(insn), s, register_name(rd), register_name(rm), shift(insn, tmp_buf, sizeof(tmp_buf))); + } + else if (is_test) + { + snprintf(buf, buf_len, "%s%s %s,%s%s", name, condition(insn), register_name(rn), register_name(rm), shift(insn, tmp_buf, sizeof(tmp_buf))); + } + else + { + snprintf(buf, buf_len, "%s%s%s %s,%s,%s%s", name, condition(insn), s, register_name(rd), register_name(rn), register_name(rm), shift(insn, tmp_buf, sizeof(tmp_buf))); + } + } + else if ((insn & 0x0fb00000) == 0x03000000) + { + unsigned int imm; + char *half = (insn & 0x00400000) ? "t" : "w"; + + imm = (insn & 0x00000fff) | ((insn & 0x000f0000) >> 4); + + snprintf(buf, buf_len, "mov%s%s %s%s", half, condition(insn), register_name(rd), immediate(imm, 0, 1, tmp_buf, sizeof(tmp_buf))); + } + else + { + unsigned int imm; + + imm = rol(insn & 0x000000ff, ((insn >> 8) & 15) * 2); + + if (is_move) + { + snprintf(buf, buf_len, "%s%s%s %s%s", name, condition(insn), s, register_name(rd), immediate(imm, 0, 1, tmp_buf, sizeof(tmp_buf))); + } + else if (is_test) + { + snprintf(buf, buf_len, "%s%s %s%s", name, condition(insn), register_name(rn), immediate(imm, 0, 1, tmp_buf, sizeof(tmp_buf))); + } + else + { + snprintf(buf, buf_len, "%s%s%s %s,%s%s", name, condition(insn), s, register_name(rd), register_name(rn), immediate(imm, 0, 1, tmp_buf, sizeof(tmp_buf))); + } + } + + return 1; +} + +static int branch(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *link = ((insn >> 24) & 1) ? "l" : ""; + unsigned int address; + unsigned int offset; + + offset = insn & 0x00ffffff; + + if ((offset & 0x00800000) != 0) + { + offset |= 0xff000000; + } + + address = pc + 8 + (offset << 2); + + snprintf(buf, buf_len, "b%s%s " ADDR_FORMAT, link, condition(insn), address); + + return 1; +} + +static int multiply(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *rd = register_name(insn >> 16); + const char *rn = register_name(insn >> 12); + const char *rs = register_name(insn >> 8); + const char *rm = register_name(insn >> 0); + const char *s = ((insn >> 20) & 1) ? "s" : ""; + int mla = (insn >> 21) & 1; + + snprintf(buf, buf_len, (mla) ? "mla%s%s %s,%s,%s,%s" : "mul%s%s %s,%s,%s", condition(insn), s, rd, rm, rs, rn); + + return 1; +} + +static int multiply_long(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *rh = register_name(insn >> 16); + const char *rl = register_name(insn >> 12); + const char *rs = register_name(insn >> 8); + const char *rm = register_name(insn >> 0); + const char *u = ((insn >> 22) & 1) ? "s" : "u"; + const char *s = ((insn >> 20) & 1) ? "s" : ""; + const char *name = ((insn >> 21) & 1) ? "mlal" : "mull"; + + snprintf(buf, buf_len, "%s%s%s%s %s,%s,%s,%s", u, name, condition(insn), s, rl, rh, rm, rs); + + return 1; +} + +static int single_data_swap(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *rn = register_name(insn >> 16); + const char *rd = register_name(insn >> 12); + const char *rm = register_name(insn >> 0); + const char *b = ((insn >> 22) & 1) ? "b" : ""; + + snprintf(buf, buf_len, "swp%s%s %s,%s,[%s]", condition(insn), b, rd, rm, rn); + + return 1; +} + +static int branch_and_exchange(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *rn = register_name(insn >> 0); + const char *l = ((insn >> 5) & 1) ? "l" : ""; + + snprintf(buf, buf_len, "b%sx%s %s", l, condition(insn), rn); + + return 1; +} + +static int halfword_data_transfer(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *rn = register_name(insn >> 16); + const char *rd = register_name(insn >> 12); + const char *name = ((insn >> 20) & 1) ? "ldr" : "str"; + const char *w = ((insn >> 21) & 1) ? "!" : ""; + int sign = (insn >> 23) & 1; + int pre = (insn >> 24) & 1; + const char *suffix = ""; + char tmp_buf[64]; + + switch ((insn >> 5) & 3) + { + case 0: + name = "swp"; + break; + case 1: + suffix = "h"; + break; + case 2: + suffix = "sb"; + break; + case 3: + suffix = "sh"; + break; + } + + if ((insn >> 22) & 1) + { + unsigned int imm = ((insn >> 4) & 0xf0) | (insn & 0x0f); + + snprintf(buf, buf_len, (pre) ? "%s%s%s %s,[%s%s]%s" : "%s%s%s %s,[%s],%s%s", name, condition(insn), suffix, rd, rn, immediate(imm, !sign, 0, tmp_buf, sizeof(tmp_buf)), w); + } + else + { + const char *rm = register_name(insn >> 0); + + snprintf(buf, buf_len, (pre) ? "%s%s%s %s,[%s,%s%s]%s" : "%s%s%s %s,[%s],%s%s%s", name, condition(insn), suffix, rd, rn, sign ? "" : "-", rm, w); + } + + return 1; +} + +static int single_data_transfer(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *rn = register_name(insn >> 16); + const char *rd = register_name(insn >> 12); + const char *name = ((insn >> 20) & 1) ? "ldr" : "str"; + const char *w = ((insn >> 21) & 1) ? "!" : ""; + const char *b = ((insn >> 22) & 1) ? "b" : ""; + int sign = (insn >> 23) & 1; + int pre = (insn >> 24) & 1; + char tmp_buf[64]; + + if ((insn >> 25) & 1) + { + const char *rm = register_name(insn >> 0); + + snprintf(buf, buf_len, (pre) ? "%s%s%s %s,[%s,%s%s%s]%s" : "%s%s%s %s,[%s],%s%s%s%s", name, condition(insn), b, rd, rn, sign ? "" : "-", rm, shift(insn, tmp_buf, sizeof(tmp_buf)), w); + } + else + { + unsigned int imm = insn & 0x00000fff; + + snprintf(buf, buf_len, (pre) ? "%s%s%s %s,[%s%s]%s" : "%s%s%s %s,[%s]%s%s", name, condition(insn), b, rd, rn, immediate(imm, !sign, 0, tmp_buf, sizeof(tmp_buf)), w); + } + + return 1; +} + +static int block_data_transfer(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *s = ((insn >> 22) & 1) ? "^" : ""; + const char *w = ((insn >> 21) & 1) ? "!" : ""; + int load = (insn >> 20) & 1; + const char *name = (load) ? "ldm" : "stm"; + const char *ldm_stubs[4] = { "fa", "fd", "ea", "ed" }; + const char *stm_stubs[4] = { "ed", "ea", "fd", "fa" }; + int stub_idx = (insn >> 23) & 3; + const char *stub = (load) ? ldm_stubs[stub_idx] : stm_stubs[stub_idx]; + char tmp_buf[64]; + + snprintf(buf, buf_len, "%s%s%s %s%s, {%s}%s", name, condition(insn), stub, register_name(insn >> 16), w, register_list(insn & 0xffff, tmp_buf, sizeof(tmp_buf)), s); + + return 1; +} + +static int coprocessor_data_transfer(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *name = ((insn >> 20) & 1) ? "ldc" : "stc"; + const char *rn = register_name(insn >> 16); + int sign = (insn >> 23) & 1; + const char *l = ((insn >> 22) & 1) ? "l" : ""; + const char *w = ((insn >> 21) & 1) ? "!" : ""; + int pre = (insn >> 24) & 1; + unsigned int cp = (insn >> 8) & 15; + unsigned int cd = (insn >> 12) & 15; + unsigned int imm = (insn >> 0) & 255; + char tmp_buf[64]; + + snprintf(buf, buf_len, (pre) ? "%s%s%s p%d,cr%d,[%s%s]%s" : "%s%s%s p%d,cr%d,[%s]%s%s", name, condition(insn), l, cp, cd, rn, immediate(imm, !sign, 0, tmp_buf, sizeof(tmp_buf)), w); + + return 1; +} + +static int coprocessor_data_operation(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + snprintf(buf, buf_len, "cdp%s p%d,%d,cr%d,cr%d,cr%d,{%d}", condition(insn), (insn >> 8) & 15, (insn >> 20) & 15, (insn >> 12) & 15, (insn >> 16) & 15, (insn >> 0) & 15, (insn >> 5) & 7); + + return 1; +} + +static int coprocessor_register_transfer(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + const char *name = ((insn >> 20) & 1) ? "mrc" : "mcr"; + unsigned int cn = (insn >> 16) & 15; + const char *rd = register_name(insn >> 12); + unsigned int expr1 = (insn >> 21) & 7; + unsigned int expr2 = (insn >> 5) & 7; + unsigned int cp = (insn >> 8) & 15; + unsigned int cm = (insn >> 0) & 15; + + snprintf(buf, buf_len, "%s%s p%d,%d,%s,cr%d,cr%d,{%d}", name, condition(insn), cp, expr1, rd, cn, cm, expr2); + + return 1; +} + +static int software_interrupt(unsigned int pc, unsigned int insn, char *buf, size_t buf_len) +{ + snprintf(buf, buf_len, "swi%s %u", condition(insn), insn & 0x00ffffff); + + return 1; +} + +int disarm(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len, unsigned long *addr) +{ + *addr = 0; + + if ((insn & 0x0fffffd0) == 0x012fff10) + return branch_and_exchange(pc, insn, buf, buf_len); + + if ((insn & 0x0fb00ff0) == 0x01000090) + return single_data_swap(pc, insn, buf, buf_len); + + if ((insn & 0x0fc000f0) == 0x00000090) + return multiply(pc, insn, buf, buf_len); + + if ((insn & 0x0f8000f0) == 0x00800090) + return multiply_long(pc, insn, buf, buf_len); + + if ((insn & 0x0f000010) == 0x0e000000) + return coprocessor_data_operation(pc, insn, buf, buf_len); + + if ((insn & 0x0f000010) == 0x0e000010) + return coprocessor_register_transfer(pc, insn, buf, buf_len); + + if ((insn & 0x0f000000) == 0x0f000000) + return software_interrupt(pc, insn, buf, buf_len); + + if ((insn & 0x0e000090) == 0x00000090) + return halfword_data_transfer(pc, insn, buf, buf_len); + + if ((insn & 0x0e000000) == 0x08000000) + return block_data_transfer(pc, insn, buf, buf_len); + + if ((insn & 0x0e000000) == 0x0a000000) { + *addr = (unsigned long)pc+8 + ((unsigned long)(insn << 8) >> 6); + return branch(pc, insn, buf, buf_len); + } + + if ((insn & 0x0e000000) == 0x0c000000) + return coprocessor_data_transfer(pc, insn, buf, buf_len); + + if ((insn & 0x0c000000) == 0x00000000) + return data_processing(pc, insn, buf, buf_len); + + if ((insn & 0x0c000000) == 0x04000000) + return single_data_transfer(pc, insn, buf, buf_len); + + return 0; +} + diff --git a/platform/common/disarm.h b/platform/common/disarm.h new file mode 100644 index 00000000..a07675fd --- /dev/null +++ b/platform/common/disarm.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2012 Wojtek Kaniewski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef DISARM_H +#define DISARM_H + +int disarm(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len, unsigned long *sym); + +#endif /* DISARM_H */ diff --git a/platform/common/dismips.c b/platform/common/dismips.c new file mode 100644 index 00000000..280ae994 --- /dev/null +++ b/platform/common/dismips.c @@ -0,0 +1,428 @@ +/* + * very basic mips disassembler for MIPS32/MIPS64 Release 2, only for picodrive + * Copyright (C) 2019-2021 irixxxx + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ + +// unimplemented insns: SYNC, CACHE, PREF, +// MOV[FT], LWC*/LDC*, SWC*/SDC*, and all of COP* (fpu, mmu, irq, exc, ...) +// unimplemented variants of insns: EHB, SSNOP (both SLL zero), JALR.HB, JR.HB +// however, it's certainly good enough for anything picodrive DRC throws at it. + +#include +#include +#include +#include + +#include "dismips.h" + + +static char *const register_names[32] = { + "$zero", + "$at", + "$v0", + "$v1", + "$a0", + "$a1", + "$a2", + "$a3", + "$t0", + "$t1", + "$t2", + "$t3", + "$t4", + "$t5", + "$t6", + "$t7", + "$s0", + "$s1", + "$s2", + "$s3", + "$s4", + "$s5", + "$s6", + "$s7", + "$t8", + "$t9", + "$k0", + "$k1", + "$gp", + "$sp", + "$fp", + "$ra" +}; + + +enum insn_type { + REG_DST, REG_ST, REG_TD, // 3, 2, or 1 regs + REG_DS, REG_DT, REG_D, REG_S, + S_IMM_DT, // 2 regs with shift amount + F_IMM_TS, // 2 regs with bitfield spec + B_IMM_S, B_IMM_TS, // pc-relative branches with 1 or 2 regs + T_IMM_S, // trap insns with immediate + J_IMM, // region-relative jump + A_IMM_TS, // arithmetic immediate with 2 regs + L_IMM_T, L_IMM_TS, // logical immediate with 1 or 2 regs + M_IMM_TS, M_IMM_S, // memory indexed with 2 regs + SB_CODE, // code parameter (syscall, break, sdbbp) + SR_BIT = 0x80 // shift right with R-bit +}; + +struct insn { + unsigned char op; + enum insn_type type; + char *name; +}; + +// ATTN: these array MUST be sorted by op (decode relies on it) + +// instructions with opcode SPECIAL (R-type) +#define OP_SPECIAL 0x00 +static const struct insn special_insns[] = { + {0x00, S_IMM_DT, "sll"}, +// {0x01, , "movf\0movt"}, + {0x02, S_IMM_DT|SR_BIT, "srl\0rotr"}, + {0x03, S_IMM_DT, "sra"}, + {0x04, REG_DST, "sllv"}, + {0x06, REG_DST|SR_BIT, "srlv\0rotrv"}, + {0x07, REG_DST, "srav"}, + {0x08, REG_S, "jr"}, + {0x09, REG_DS, "jalr"}, + {0x0a, REG_DST, "movz"}, + {0x0b, REG_DST, "movn"}, + {0x0c, SB_CODE, "syscall"}, + {0x0d, SB_CODE, "break"}, +// {0x0f, , "sync"}, + {0x10, REG_D, "mfhi"}, + {0x11, REG_S, "mthi"}, + {0x12, REG_D, "mflo"}, + {0x13, REG_S, "mtlo"}, + {0x14, REG_DST, "dsllv"}, + {0x16, REG_DST|SR_BIT, "dsrlv\0drotrv"}, + {0x17, REG_DST, "dsrav"}, + {0x18, REG_ST, "mult"}, + {0x19, REG_ST, "multu"}, + {0x1A, REG_ST, "div"}, + {0x1B, REG_ST, "divu"}, + {0x1C, REG_ST, "dmult"}, + {0x1D, REG_ST, "dmultu"}, + {0x1E, REG_ST, "ddiv"}, + {0x1F, REG_ST, "ddivu"}, + {0x20, REG_DST, "add"}, + {0x21, REG_DST, "addu"}, + {0x22, REG_DST, "sub"}, + {0x23, REG_DST, "subu"}, + {0x24, REG_DST, "and"}, + {0x25, REG_DST, "or"}, + {0x26, REG_DST, "xor"}, + {0x27, REG_DST, "nor"}, + {0x2A, REG_DST, "slt"}, + {0x2B, REG_DST, "sltu"}, + {0x2C, REG_DST, "dadd"}, + {0x2D, REG_DST, "daddu"}, + {0x2E, REG_DST, "dsub"}, + {0x2F, REG_DST, "dsubu"}, + {0x30, REG_ST, "tge" }, + {0x31, REG_ST, "tgeu" }, + {0x32, REG_ST, "tlt" }, + {0x33, REG_ST, "tltu" }, + {0x34, REG_ST, "teq" }, + {0x36, REG_ST, "tne" }, + {0x38, S_IMM_DT, "dsll"}, + {0x3A, S_IMM_DT|SR_BIT, "dsrl\0drotrv"}, + {0x3B, S_IMM_DT, "dsra"}, + {0x3C, S_IMM_DT, "dsll32"}, + {0x3E, S_IMM_DT|SR_BIT, "dsrl32\0drotr32"}, + {0x3F, S_IMM_DT, "dsra32"}, +}; + +// instructions with opcode SPECIAL2 (R-type) +#define OP_SPECIAL2 0x1C +static const struct insn special2_insns[] = { + {0x00, REG_ST, "madd" }, + {0x01, REG_ST, "maddu" }, + {0x02, REG_ST, "mul" }, + {0x04, REG_ST, "msub" }, + {0x05, REG_ST, "msubu" }, + {0x20, REG_DS, "clz" }, + {0x21, REG_DS, "clo" }, + {0x24, REG_DS, "dclz" }, + {0x25, REG_DS, "dclo" }, + {0x37, SB_CODE, "sdbbp" }, +}; + +// instructions with opcode SPECIAL3 (R-type) +#define OP_SPECIAL3 0x1F +static const struct insn special3_insns[] = { + {0x00, F_IMM_TS, "ext" }, + {0x01, F_IMM_TS, "dextm" }, + {0x02, F_IMM_TS, "dextu" }, + {0x03, F_IMM_TS, "dext" }, + {0x04, F_IMM_TS, "ins" }, + {0x05, F_IMM_TS, "dinsm" }, + {0x06, F_IMM_TS, "dinsu" }, + {0x07, F_IMM_TS, "dins" }, + {0x3b, REG_TD, "rdhwr" }, +}; + +// instruction with opcode SPECIAL3 and function *BSHFL +#define FN_BSHFL 0x20 +static const struct insn bshfl_insns[] = { + {0x02, REG_DT, "wsbh" }, + {0x10, REG_DT, "seb" }, + {0x18, REG_DT, "seh" }, +}; +#define FN_DBSHFL 0x24 +static const struct insn dbshfl_insns[] = { + {0x02, REG_DT, "dsbh" }, + {0x05, REG_DT, "dshd" }, +}; + +// instructions with opcode REGIMM (I-type) +#define OP_REGIMM 0x01 +static const struct insn regimm_insns[] = { + {0x00, B_IMM_S, "bltz"}, + {0x01, B_IMM_S, "bgez"}, + {0x02, B_IMM_S, "bltzl"}, + {0x03, B_IMM_S, "bgezl"}, + {0x08, T_IMM_S, "tgei"}, + {0x09, T_IMM_S, "tgeiu"}, + {0x0a, T_IMM_S, "tlti"}, + {0x0b, T_IMM_S, "tltiu"}, + {0x0c, T_IMM_S, "teqi"}, + {0x0e, T_IMM_S, "tnei"}, + {0x10, B_IMM_S, "bltzal"}, + {0x11, B_IMM_S, "bgezal"}, + {0x12, B_IMM_S, "bltzall"}, + {0x13, B_IMM_S, "bgezall"}, + {0x1f, M_IMM_S, "synci" }, +}; + +// instructions with other opcodes (I-type) +static const struct insn immediate_insns[] = { + {0x02, J_IMM, "j"}, + {0x03, J_IMM, "jal"}, + {0x04, B_IMM_TS, "beq"}, + {0x05, B_IMM_TS, "bne"}, + {0x06, B_IMM_S, "blez"}, + {0x07, B_IMM_S, "bgtz"}, + {0x08, A_IMM_TS, "addi"}, + {0x09, A_IMM_TS, "addiu"}, + {0x0A, A_IMM_TS, "slti"}, + {0x0B, A_IMM_TS, "sltiu"}, + {0x0C, L_IMM_TS, "andi"}, + {0x0D, L_IMM_TS, "ori"}, + {0x0E, L_IMM_TS, "xori"}, + {0x0F, L_IMM_T, "lui"}, + {0x14, B_IMM_TS, "beql"}, + {0x15, B_IMM_TS, "bnel"}, + {0x16, B_IMM_S, "blezl"}, + {0x17, B_IMM_S, "bgtzl"}, + {0x18, A_IMM_TS, "daddi"}, + {0x19, A_IMM_TS, "daddiu"}, + {0x1A, M_IMM_TS, "ldl"}, + {0x1B, M_IMM_TS, "ldr"}, + {0x20, M_IMM_TS, "lb"}, + {0x21, M_IMM_TS, "lh"}, + {0x22, M_IMM_TS, "lwl"}, + {0x23, M_IMM_TS, "lw"}, + {0x24, M_IMM_TS, "lbu"}, + {0x25, M_IMM_TS, "lhu"}, + {0x26, M_IMM_TS, "lwr"}, + {0x27, M_IMM_TS, "lwu"}, + {0x28, M_IMM_TS, "sb"}, + {0x29, M_IMM_TS, "sh"}, + {0x2A, M_IMM_TS, "swl"}, + {0x2B, M_IMM_TS, "sw"}, + {0x2C, M_IMM_TS, "sdl"}, + {0x2D, M_IMM_TS, "sdr"}, + {0x2E, M_IMM_TS, "swr"}, +// {0x2F, , "cache"}, + {0x30, M_IMM_TS, "ll"}, +// {0x31, , "lwc1"}, +// {0x32, , "lwc2"}, +// {0x33, , "pref"}, + {0x34, M_IMM_TS, "lld"}, +// {0x35, , "ldc1"}, +// {0x36, , "ldc2"}, + {0x37, M_IMM_TS, "ld"}, + {0x38, M_IMM_TS, "sc"}, +// {0x39, , "swc1"}, +// {0x3A, , "swc2"}, + {0x3C, M_IMM_TS, "scd"}, +// {0x3D, , "sdc1"}, +// {0x3E, , "sdc2"}, + {0x3F, M_IMM_TS, "sd"}, +}; + +#define ARRAY_SIZE(a) (sizeof(a)/sizeof(*a)) + +// find instruction description for insn +static const struct insn *decode_insn(uint32_t insn) +{ + uint32_t op = insn >> 26; + const struct insn *pi; + int l = 0, r = 0; + + if (op == OP_SPECIAL) { + op = insn & 0x3f; + pi = special_insns; + r = ARRAY_SIZE(special_insns)-1; + } else if (op == OP_SPECIAL2) { + op = insn & 0x3f; + pi = special2_insns; + r = ARRAY_SIZE(special2_insns)-1; + } else if (op == OP_SPECIAL3) { + op = insn & 0x3f; + if (op == FN_BSHFL) { + op = (insn >> 6) & 0x1f; + pi = bshfl_insns; + r = ARRAY_SIZE(bshfl_insns)-1; + } else if (op == FN_DBSHFL) { + op = (insn >> 6) & 0x1f; + pi = dbshfl_insns; + r = ARRAY_SIZE(dbshfl_insns)-1; + } else { + pi = special3_insns; + r = ARRAY_SIZE(special3_insns)-1; + } + } else if (op == OP_REGIMM) { + op = (insn>>16) & 0x1f; + pi = regimm_insns; + r = ARRAY_SIZE(regimm_insns)-1; + } else { + pi = immediate_insns; + r = ARRAY_SIZE(immediate_insns)-1; + } + + while (l <= r) { + int m = (l+r) / 2; + if (pi[m].op == op) + return pi+m; + else if (pi[m].op < op) + l = m+1; + else + r = m-1; + } + return NULL; +} + +// calculate target for pc-relative branches +static unsigned long b_target(unsigned long pc, uint32_t insn) +{ + return pc + 4 + (int16_t)insn * 4; +} + +// calculate target for region-relative branches +static unsigned long j_target(unsigned long pc, uint32_t insn) +{ + return (pc & ~0x0fffffffL) | ((insn & 0x03ffffff) << 2); +} + +// main disassembler function +int dismips(uintptr_t pc, uint32_t insn, char *buf, size_t buflen, unsigned long *sym) +{ + const struct insn *pi = decode_insn(insn); + char *rs = register_names[(insn >> 21) & 0x1f]; + char *rt = register_names[(insn >> 16) & 0x1f]; + char *rd = register_names[(insn >> 11) & 0x1f]; + int sa = (insn >> 6) & 0x1f, sb = (insn >> 11) & 0x1f; + int imm = (int16_t) insn; + + *sym = 0; + if (pi == NULL) { + snprintf(buf, buflen, "0x%08lx", (unsigned long)insn); + return 0; + } + + switch (pi->type & ~SR_BIT) { + case REG_DST: + if ((insn & 0x3f) == 0x25 /*OR*/ && (insn & 0x1f0000) == 0 /*zero*/) + snprintf(buf, buflen, "move %s, %s", rd, rs); + else if ((pi->type & SR_BIT) && (insn & (1<<6))) + snprintf(buf, buflen, "%s %s, %s, %s", pi->name+strlen(pi->name)+1, rd, rs, rt); + else + snprintf(buf, buflen, "%s %s, %s, %s", pi->name, rd, rs, rt); + break; + case REG_ST: + if ((insn & 0x38) == 0x30 /*T..*/) + snprintf(buf, buflen, "%s %s, %s (code %d)", pi->name, rs, rt, (int)(insn>>6) & 0x3ff); + else + snprintf(buf, buflen, "%s %s, %s", pi->name, rs, rt); + break; + case REG_TD: + snprintf(buf, buflen, "%s %s, %s", pi->name, rt, rd); + break; + case REG_DS: + snprintf(buf, buflen, "%s %s, %s", pi->name, rd, rs); + break; + case REG_DT: + snprintf(buf, buflen, "%s %s, %s", pi->name, rd, rt); + break; + case REG_D: + snprintf(buf, buflen, "%s %s", pi->name, rd); + break; + case REG_S: + snprintf(buf, buflen, "%s %s", pi->name, rs); + break; + case S_IMM_DT: + if (insn == 0x00000000) + snprintf(buf, buflen, "nop"); + else if ((pi->type & SR_BIT) && (insn & (1<<21))) + snprintf(buf, buflen, "%s %s, %s, %d", pi->name+strlen(pi->name)+1, rd, rt, sa); + else + snprintf(buf, buflen, "%s %s, %s, %d", pi->name, rd, rt, sa); + break; + //dext: pos,size-1 dextm: pos,size-33 dextu: pos-32,size-1 + //dins: pos,pos+size-1 dinsm: pos,pos+size-33 dinsu: pos-32,pos+size-33 + case F_IMM_TS: + if (insn & 0x01) sb+=32; // ...m + if (insn & 0x02) sa+=32; // ...u + if (insn & 0x04) sb-=sa; // ins + snprintf(buf, buflen, "%s %s, %s, %d, %d", pi->name, rt, rs, sa, sb+1); + break; + case B_IMM_S: + *sym = b_target(pc, insn); + snprintf(buf, buflen, "%s %s, 0x%lx", pi->name, rs, *sym); + break; + case B_IMM_TS: + *sym = b_target(pc, insn); + snprintf(buf, buflen, "%s %s, %s, 0x%lx", pi->name, rs, rt, *sym); + break; + case J_IMM: + *sym = j_target(pc, insn); + snprintf(buf, buflen, "%s 0x%lx", pi->name, *sym); + break; + case A_IMM_TS: + if (abs(imm) < 1000) + snprintf(buf, buflen, "%s %s, %s, %d", pi->name, rt, rs, imm); + else + snprintf(buf, buflen, "%s %s, %s, 0x%x", pi->name, rt, rs, imm); + break; + case L_IMM_T: + snprintf(buf, buflen, "%s %s, 0x%x", pi->name, rt, (uint16_t)imm); + break; + case L_IMM_TS: + if ((insn >> 26) == 0x34 /*ORI*/ && (insn & 0x03e00000) == 0 /*zero*/) + snprintf(buf, buflen, "li %s, 0x%x", rt, (uint16_t)imm); + else + snprintf(buf, buflen, "%s %s, %s, 0x%x", pi->name, rt, rs, (uint16_t)imm); + break; + case M_IMM_TS: + snprintf(buf, buflen, "%s %s, %d(%s)", pi->name, rt, imm, rs); + break; + case M_IMM_S: + snprintf(buf, buflen, "%s %d(%s)", pi->name, imm, rs); + break; + case T_IMM_S: + snprintf(buf, buflen, "%s %s, %d", pi->name, rs, imm); + break; + case SB_CODE: + snprintf(buf, buflen, "%s %ld", pi->name, (unsigned long)(insn>>6) & 0xfffff); + break; + } + return 1; +} + diff --git a/platform/common/dismips.h b/platform/common/dismips.h new file mode 100644 index 00000000..8d105925 --- /dev/null +++ b/platform/common/dismips.h @@ -0,0 +1,6 @@ +#ifndef DISMIPS_H +#define DISMIPS_H + +int dismips(uintptr_t pc, uint32_t insn, char *buf, size_t buf_len, unsigned long *sym); + +#endif /* DISMIPS_H */ diff --git a/platform/common/dr_libs b/platform/common/dr_libs new file mode 160000 index 00000000..dd762b86 --- /dev/null +++ b/platform/common/dr_libs @@ -0,0 +1 @@ +Subproject commit dd762b861ecadf5ddd5fb03e9ca1db6707b54fbb diff --git a/platform/common/emu.c b/platform/common/emu.c index c66c637c..0f837cb4 100644 --- a/platform/common/emu.c +++ b/platform/common/emu.c @@ -1,6 +1,7 @@ /* * PicoDrive * (C) notaz, 2007-2010 + * (C) irixxxx, 2019-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -19,6 +20,7 @@ #include "../libpicofe/fonts.h" #include "../libpicofe/sndout.h" #include "../libpicofe/lprintf.h" +#include "../libpicofe/readpng.h" #include "../libpicofe/plat.h" #include "emu.h" #include "input_pico.h" @@ -28,6 +30,10 @@ #include #include +#if defined(__GNUC__) && __GNUC__ >= 7 +#pragma GCC diagnostic ignored "-Wformat-truncation" +#endif + #ifndef _WIN32 #define PATH_SEP "/" #define PATH_SEP_C '/' @@ -42,6 +48,7 @@ void *g_screen_ptr; int g_screen_width = 320; int g_screen_height = 240; +int g_screen_ppitch = 320; // pitch in pixels const char *PicoConfigFile = "config2.cfg"; currentConfig_t currentConfig, defaultConfig; @@ -52,7 +59,7 @@ int pico_inp_mode; int flip_after_sync; int engineState = PGS_Menu; -static short __attribute__((aligned(4))) sndBuffer[2*44100/50]; +static short __attribute__((aligned(4))) sndBuffer[2*54000/50]; /* tmp buff to reduce stack usage for plats with small stack */ static char static_buff[512]; @@ -122,8 +129,8 @@ static void fname_ext(char *dst, int dstlen, const char *prefix, const char *ext strncpy(dst + prefix_len, p, dstlen - prefix_len - 1); dst[dstlen - 8] = 0; - if (dst[strlen(dst) - 4] == '.') - dst[strlen(dst) - 4] = 0; + if ((p = strrchr(dst, '.')) != NULL) + dst[p-dst] = 0; if (ext) strcat(dst, ext); } @@ -171,12 +178,27 @@ static const char *find_bios(int *region, const char *cd_fname) ret = emu_read_config(cd_fname, 0); if (!ret) emu_read_config(NULL, 0); - if (PicoRegionOverride) { - *region = PicoRegionOverride; + if (PicoIn.regionOverride) { + *region = PicoIn.regionOverride; lprintf("override region to %s\n", *region != 4 ? (*region == 8 ? "EU" : "JAP") : "USA"); } + // look for MSU.MD rom file. XXX another extension list? ugh... + static const char *md_exts[] = { "gen", "smd", "md", "32x" }; + char *ext = strrchr(cd_fname, '.'); + int extpos = ext ? ext-cd_fname : strlen(cd_fname); + strcpy(static_buff, cd_fname); + static_buff[extpos++] = '.'; + for (i = 0; i < ARRAY_SIZE(md_exts); i++) { + strcpy(static_buff+extpos, md_exts[i]); + if (access(static_buff, R_OK) == 0) { + printf("found MSU rom: %s\n",static_buff); + return static_buff; + } + } + + // locate BIOS file if (*region == 4) { // US files = biosfiles_us; count = sizeof(biosfiles_us) / sizeof(char *); @@ -201,6 +223,16 @@ static const char *find_bios(int *region, const char *cd_fname) strcat(static_buff, ".zip"); f = fopen(static_buff, "rb"); if (f) break; + + strcpy(static_buff, files[i]); + strcat(static_buff, ".bin"); + f = fopen(static_buff, "rb"); + if (f) break; + + static_buff[strlen(static_buff) - 4] = 0; + strcat(static_buff, ".zip"); + f = fopen(static_buff, "rb"); + if (f) break; } if (f) { @@ -266,16 +298,16 @@ static char *emu_make_rom_id(const char *fname) static char id_string[3+0xe*3+0x3*3+0x30*3+3]; int pos, swab = 1; - if (PicoAHW & PAHW_MCD) { + if (PicoIn.AHW & PAHW_MCD) { strcpy(id_string, "CD|"); swab = 0; } - else if (PicoAHW & PAHW_SMS) + else if (PicoIn.AHW & PAHW_SMS) strcpy(id_string, "MS|"); else strcpy(id_string, "MD|"); pos = 3; - if (!(PicoAHW & PAHW_SMS)) { + if (!(PicoIn.AHW & PAHW_SMS)) { pos += extract_text(id_string + pos, media_id_header + 0x80, 0x0e, swab); // serial id_string[pos] = '|'; pos++; pos += extract_text(id_string + pos, media_id_header + 0xf0, 0x03, swab); // region @@ -296,7 +328,7 @@ static char *emu_make_rom_id(const char *fname) // buffer must be at least 150 byte long void emu_get_game_name(char *str150) { - int ret, swab = (PicoAHW & PAHW_MCD) ? 0 : 1; + int ret, swab = (PicoIn.AHW & PAHW_MCD) ? 0 : 1; char *s, *d; ret = extract_text(str150, media_id_header + 0x50, 0x30, swab); // overseas name @@ -315,25 +347,33 @@ static void system_announce(void) const char *sys_name, *tv_standard, *extra = ""; int fps; - if (PicoAHW & PAHW_SMS) { + if (PicoIn.AHW & PAHW_SMS) { sys_name = "Master System"; + if (PicoIn.AHW & PAHW_GG) + sys_name = "Game Gear"; + else if (PicoIn.AHW & PAHW_SG) + sys_name = "SG-1000"; + else if (PicoIn.AHW & PAHW_SC) + sys_name = "SC-3000"; + else if (Pico.m.hardware & PMS_HW_JAP) + sys_name = "Mark III"; #ifdef NO_SMS extra = " [no support]"; #endif - } else if (PicoAHW & PAHW_PICO) { + } else if (PicoIn.AHW & PAHW_PICO) { sys_name = "Pico"; - } else if ((PicoAHW & (PAHW_32X|PAHW_MCD)) == (PAHW_32X|PAHW_MCD)) { + } else if ((PicoIn.AHW & (PAHW_32X|PAHW_MCD)) == (PAHW_32X|PAHW_MCD)) { sys_name = "32X + Mega CD"; if ((Pico.m.hardware & 0xc0) == 0x80) sys_name = "32X + Sega CD"; - } else if (PicoAHW & PAHW_MCD) { + } else if (PicoIn.AHW & PAHW_MCD) { sys_name = "Mega CD"; if ((Pico.m.hardware & 0xc0) == 0x80) sys_name = "Sega CD"; - } else if (PicoAHW & PAHW_32X) { + } else if (PicoIn.AHW & PAHW_32X) { sys_name = "32X"; } else { - sys_name = "MegaDrive"; + sys_name = "Mega Drive"; if ((Pico.m.hardware & 0xc0) == 0x80) sys_name = "Genesis"; } @@ -376,7 +416,7 @@ int emu_reload_rom(const char *rom_fname_in) movie_data = 0; } - if (!strcmp(ext, ".gmv")) + if (!strcasecmp(ext, ".gmv")) { // check for both gmv and rom int dummy; @@ -413,7 +453,7 @@ int emu_reload_rom(const char *rom_fname_in) get_ext(rom_fname, ext); lprintf("gmv loaded for %s\n", rom_fname); } - else if (!strcmp(ext, ".pat")) + else if (!strcasecmp(ext, ".pat")) { int dummy; PicoPatchLoad(rom_fname); @@ -430,7 +470,7 @@ int emu_reload_rom(const char *rom_fname_in) emu_make_path(carthw_path, "carthw.cfg", sizeof(carthw_path)); - media_type = PicoLoadMedia(rom_fname, carthw_path, + media_type = PicoLoadMedia(rom_fname, NULL, 0, carthw_path, find_bios, do_region_override); switch (media_type) { @@ -451,7 +491,7 @@ int emu_reload_rom(const char *rom_fname_in) } // make quirks visible in UI - if (PicoQuirks & PQUIRK_FORCE_6BTN) + if (PicoIn.quirks & PQUIRK_FORCE_6BTN) currentConfig.input_dev0 = PICO_INPUT_PAD_6BTN; menu_romload_end(); @@ -470,12 +510,12 @@ int emu_reload_rom(const char *rom_fname_in) PicoSetInputDevice(0, indev); PicoSetInputDevice(1, indev); - PicoOpt |= POPT_DIS_VDP_FIFO; // no VDP fifo timing + PicoIn.opt |= POPT_DIS_VDP_FIFO; // no VDP fifo timing if (movie_data[0xF] >= 'A') { if (movie_data[0x16] & 0x80) { - PicoRegionOverride = 8; + PicoIn.regionOverride = 8; } else { - PicoRegionOverride = 4; + PicoIn.regionOverride = 4; } PicoReset(); // TODO: bits 6 & 5 @@ -485,8 +525,11 @@ int emu_reload_rom(const char *rom_fname_in) } else { + PicoSetInputDevice(0, currentConfig.input_dev0); + PicoSetInputDevice(1, currentConfig.input_dev1); + system_announce(); - PicoOpt &= ~POPT_DIS_VDP_FIFO; + PicoIn.opt &= ~POPT_DIS_VDP_FIFO; } strncpy(rom_fname_loaded, rom_fname, sizeof(rom_fname_loaded)-1); @@ -530,19 +573,20 @@ out: int emu_swap_cd(const char *fname) { - cd_img_type cd_type; + enum cd_track_type cd_type; int ret = -1; cd_type = PicoCdCheck(fname, NULL); - if (cd_type != CIT_NOT_CD) - ret = Insert_CD(fname, cd_type); + if (cd_type != CT_UNKNOWN) + ret = cdd_load(fname, cd_type); if (ret != 0) { menu_update_msg("Load failed, invalid CD image?"); return 0; } strncpy(rom_fname_loaded, fname, sizeof(rom_fname_loaded)-1); - rom_fname_loaded[sizeof(rom_fname_loaded)-1] = 0; + rom_fname_loaded[sizeof(rom_fname_loaded) - 1] = 0; + return 1; } @@ -575,15 +619,19 @@ static void make_config_cfg(char *cfg_buff_512) void emu_prep_defconfig(void) { memset(&defaultConfig, 0, sizeof(defaultConfig)); - defaultConfig.EmuOpt = 0x9d | EOPT_EN_CD_LEDS; - defaultConfig.s_PicoOpt = POPT_EN_STEREO|POPT_EN_FM|POPT_EN_PSG|POPT_EN_Z80 | + defaultConfig.EmuOpt = EOPT_EN_SRAM | EOPT_EN_SOUND | EOPT_16BPP | + EOPT_EN_CD_LEDS | EOPT_GZIP_SAVES | EOPT_PICO_PEN; + defaultConfig.s_PicoOpt = POPT_EN_SNDFILTER|POPT_EN_GG_LCD|POPT_EN_YM2413 | + POPT_EN_STEREO|POPT_EN_FM|POPT_EN_PSG|POPT_EN_Z80 | POPT_EN_MCD_PCM|POPT_EN_MCD_CDDA|POPT_EN_MCD_GFX | POPT_EN_DRC|POPT_ACC_SPRITES | POPT_EN_32X|POPT_EN_PWM; defaultConfig.s_PsndRate = 44100; defaultConfig.s_PicoRegion = 0; // auto defaultConfig.s_PicoAutoRgnOrder = 0x184; // US, EU, JP + defaultConfig.s_hwSelect = PHWS_AUTO; defaultConfig.s_PicoCDBuffers = 0; + defaultConfig.s_PicoSndFilterAlpha = 0x10000 * 60 / 100; defaultConfig.confirm_save = EOPT_CONFIRM_SAVE; defaultConfig.Frameskip = -1; // auto defaultConfig.input_dev0 = PICO_INPUT_PAD_3BTN; @@ -594,6 +642,7 @@ void emu_prep_defconfig(void) defaultConfig.turbo_rate = 15; defaultConfig.msh2_khz = PICO_MSH2_HZ / 1000; defaultConfig.ssh2_khz = PICO_SSH2_HZ / 1000; + defaultConfig.max_skip = 4; // platform specific overrides pemu_prep_defconfig(); @@ -602,11 +651,12 @@ void emu_prep_defconfig(void) void emu_set_defconfig(void) { memcpy(¤tConfig, &defaultConfig, sizeof(currentConfig)); - PicoOpt = currentConfig.s_PicoOpt; - PsndRate = currentConfig.s_PsndRate; - PicoRegionOverride = currentConfig.s_PicoRegion; - PicoAutoRgnOrder = currentConfig.s_PicoAutoRgnOrder; - PicoCDBuffers = currentConfig.s_PicoCDBuffers; + PicoIn.opt = currentConfig.s_PicoOpt; + PicoIn.sndRate = currentConfig.s_PsndRate; + PicoIn.regionOverride = currentConfig.s_PicoRegion; + PicoIn.autoRgnOrder = currentConfig.s_PicoAutoRgnOrder; + PicoIn.hwSelect = currentConfig.s_hwSelect; + PicoIn.sndFilterAlpha = currentConfig.s_PicoSndFilterAlpha; } int emu_read_config(const char *rom_fname, int no_defaults) @@ -652,14 +702,9 @@ int emu_read_config(const char *rom_fname, int no_defaults) } pemu_validate_config(); + PicoIn.overclockM68k = currentConfig.overclock_68k; // some sanity checks -#ifdef PSP - /* TODO: mv to plat_validate_config() */ - if (currentConfig.CPUclock < 10 || currentConfig.CPUclock > 4096) currentConfig.CPUclock = 200; - if (currentConfig.gamma < -4 || currentConfig.gamma > 16) currentConfig.gamma = 0; - if (currentConfig.gamma2 < 0 || currentConfig.gamma2 > 2) currentConfig.gamma2 = 0; -#endif if (currentConfig.volume < 0 || currentConfig.volume > 99) currentConfig.volume = 50; @@ -729,17 +774,35 @@ void name(int x, int y, const char *text) \ } \ } -mk_text_out(emu_text_out8, unsigned char, 0xf0, g_screen_ptr, 1, g_screen_width) -mk_text_out(emu_text_out16, unsigned short, 0xffff, g_screen_ptr, 1, g_screen_width) +mk_text_out(emu_text_out8, unsigned char, 0xf0, g_screen_ptr, 1, g_screen_ppitch) +mk_text_out(emu_text_out16, unsigned short, 0xffff, g_screen_ptr, 1, g_screen_ppitch) mk_text_out(emu_text_out8_rot, unsigned char, 0xf0, - (char *)g_screen_ptr + (g_screen_width - 1) * g_screen_height, -g_screen_height, 1) + (char *)g_screen_ptr + (g_screen_ppitch - 1) * g_screen_height, -g_screen_height, 1) mk_text_out(emu_text_out16_rot, unsigned short, 0xffff, - (short *)g_screen_ptr + (g_screen_width - 1) * g_screen_height, -g_screen_height, 1) + (short *)g_screen_ptr + (g_screen_ppitch - 1) * g_screen_height, -g_screen_height, 1) #undef mk_text_out +void emu_osd_text16(int x, int y, const char *text) +{ + int len = strlen(text) * 8; + int i, h; -void update_movie(void) + len++; + if (x + len > g_screen_width) + len = g_screen_width - x; + + for (h = 0; h < 8; h++) { + unsigned short *p; + p = (unsigned short *)g_screen_ptr + + x + g_screen_ppitch * (y + h); + for (i = len; i > 0; i--, p++) + *p = (*p >> 2) & 0x39e7; + } + emu_text_out16(x, y, text); +} + +static void update_movie(void) { int offs = Pico.m.frame_count*3 + 0x40; if (offs+3 > movie_size) { @@ -749,20 +812,20 @@ void update_movie(void) lprintf("END OF MOVIE.\n"); } else { // MXYZ SACB RLDU - PicoPad[0] = ~movie_data[offs] & 0x8f; // ! SCBA RLDU - if(!(movie_data[offs] & 0x10)) PicoPad[0] |= 0x40; // C - if(!(movie_data[offs] & 0x20)) PicoPad[0] |= 0x10; // A - if(!(movie_data[offs] & 0x40)) PicoPad[0] |= 0x20; // B - PicoPad[1] = ~movie_data[offs+1] & 0x8f; // ! SCBA RLDU - if(!(movie_data[offs+1] & 0x10)) PicoPad[1] |= 0x40; // C - if(!(movie_data[offs+1] & 0x20)) PicoPad[1] |= 0x10; // A - if(!(movie_data[offs+1] & 0x40)) PicoPad[1] |= 0x20; // B - PicoPad[0] |= (~movie_data[offs+2] & 0x0A) << 8; // ! MZYX - if(!(movie_data[offs+2] & 0x01)) PicoPad[0] |= 0x0400; // X - if(!(movie_data[offs+2] & 0x04)) PicoPad[0] |= 0x0100; // Z - PicoPad[1] |= (~movie_data[offs+2] & 0xA0) << 4; // ! MZYX - if(!(movie_data[offs+2] & 0x10)) PicoPad[1] |= 0x0400; // X - if(!(movie_data[offs+2] & 0x40)) PicoPad[1] |= 0x0100; // Z + PicoIn.pad[0] = ~movie_data[offs] & 0x8f; // ! SCBA RLDU + if(!(movie_data[offs] & 0x10)) PicoIn.pad[0] |= 0x40; // C + if(!(movie_data[offs] & 0x20)) PicoIn.pad[0] |= 0x10; // A + if(!(movie_data[offs] & 0x40)) PicoIn.pad[0] |= 0x20; // B + PicoIn.pad[1] = ~movie_data[offs+1] & 0x8f; // ! SCBA RLDU + if(!(movie_data[offs+1] & 0x10)) PicoIn.pad[1] |= 0x40; // C + if(!(movie_data[offs+1] & 0x20)) PicoIn.pad[1] |= 0x10; // A + if(!(movie_data[offs+1] & 0x40)) PicoIn.pad[1] |= 0x20; // B + PicoIn.pad[0] |= (~movie_data[offs+2] & 0x0A) << 8; // ! MZYX + if(!(movie_data[offs+2] & 0x01)) PicoIn.pad[0] |= 0x0400; // X + if(!(movie_data[offs+2] & 0x04)) PicoIn.pad[0] |= 0x0100; // Z + PicoIn.pad[1] |= (~movie_data[offs+2] & 0xA0) << 4; // ! MZYX + if(!(movie_data[offs+2] & 0x10)) PicoIn.pad[1] |= 0x0400; // X + if(!(movie_data[offs+2] & 0x40)) PicoIn.pad[1] |= 0x0100; // Z } } @@ -791,9 +854,9 @@ char *emu_get_save_fname(int load, int is_sram, int slot, int *time) if (is_sram) { - strcpy(ext, (PicoAHW & PAHW_MCD) ? ".brm" : ".srm"); + strcpy(ext, (PicoIn.AHW & PAHW_MCD) ? ".brm" : ".srm"); romfname_ext(saveFname, sizeof(static_buff), - (PicoAHW & PAHW_MCD) ? "brm"PATH_SEP : "srm"PATH_SEP, ext); + (PicoIn.AHW & PAHW_MCD) ? "ms0:/PSP/SAVEDATA/SUGC/brm"PATH_SEP : "ms0:/PSP/SAVEDATA/SUGC/srm"PATH_SEP, ext); if (!load) return saveFname; @@ -867,24 +930,24 @@ int emu_save_load_game(int load, int sram) int sram_size; unsigned char *sram_data; int truncate = 1; - if (PicoAHW & PAHW_MCD) + if (PicoIn.AHW & PAHW_MCD) { - if (PicoOpt & POPT_EN_MCD_RAMCART) { + if (PicoIn.opt & POPT_EN_MCD_RAMCART) { sram_size = 0x12000; - sram_data = SRam.data; + sram_data = Pico.sv.data; if (sram_data) - memcpy32((int *)sram_data, (int *)Pico_mcd->bram, 0x2000/4); + memcpy(sram_data, Pico_mcd->bram, 0x2000); } else { sram_size = 0x2000; sram_data = Pico_mcd->bram; truncate = 0; // the .brm may contain RAM cart data after normal brm } } else { - sram_size = SRam.size; - sram_data = SRam.data; + sram_size = Pico.sv.size; + sram_data = Pico.sv.data; } if (sram_data == NULL) - return 0; // SRam forcefully disabled for this game + return 0; // cart saves forcefully disabled for this game if (load) { @@ -894,8 +957,8 @@ int emu_save_load_game(int load, int sram) ret = fread(sram_data, 1, sram_size, sramFile); ret = ret > 0 ? 0 : -1; fclose(sramFile); - if ((PicoAHW & PAHW_MCD) && (PicoOpt&POPT_EN_MCD_RAMCART)) - memcpy32((int *)Pico_mcd->bram, (int *)sram_data, 0x2000/4); + if ((PicoIn.AHW & PAHW_MCD) && (PicoIn.opt&POPT_EN_MCD_RAMCART)) + memcpy(Pico_mcd->bram, sram_data, 0x2000); } else { // sram save needs some special processing // see if we have anything to save @@ -939,18 +1002,18 @@ void emu_set_fastforward(int set_on) static int set_Frameskip, set_EmuOpt, is_on = 0; if (set_on && !is_on) { - set_PsndOut = PsndOut; + set_PsndOut = PicoIn.sndOut; set_Frameskip = currentConfig.Frameskip; set_EmuOpt = currentConfig.EmuOpt; - PsndOut = NULL; + PicoIn.sndOut = NULL; currentConfig.Frameskip = 8; - currentConfig.EmuOpt &= ~4; - currentConfig.EmuOpt |= 0x40000; + currentConfig.EmuOpt &= ~EOPT_EN_SOUND; + currentConfig.EmuOpt |= EOPT_NO_FRMLIMIT; is_on = 1; emu_status_msg("FAST FORWARD"); } else if (!set_on && is_on) { - PsndOut = set_PsndOut; + PicoIn.sndOut = set_PsndOut; currentConfig.Frameskip = set_Frameskip; currentConfig.EmuOpt = set_EmuOpt; PsndRerate(1); @@ -980,22 +1043,76 @@ void emu_reset_game(void) reset_timing = 1; } -void run_events_pico(unsigned int events) -{ - int lim_x; +static int pico_page; +static int pico_w, pico_h; +static u16 *pico_overlay; - if (events & PEV_PICO_SWINP) { - pico_inp_mode++; - if (pico_inp_mode > 2) - pico_inp_mode = 0; - switch (pico_inp_mode) { - case 2: emu_status_msg("Input: Pen on Pad"); break; - case 1: emu_status_msg("Input: Pen on Storyware"); break; - case 0: emu_status_msg("Input: Joystick"); - PicoPicohw.pen_pos[0] = PicoPicohw.pen_pos[1] = 0x8000; - break; +static u16 *load_pico_overlay(int page, int w, int h) +{ + static const char *pic_exts[] = { "png", "PNG" }; + char *ext, *fname = NULL; + int extpos, i; + + if (pico_page == page && pico_w == w && pico_h == h) + return pico_overlay; + pico_page = page; + pico_w = w, pico_h = h; + + ext = strrchr(rom_fname_loaded, '.'); + extpos = ext ? ext-rom_fname_loaded : strlen(rom_fname_loaded); + strcpy(static_buff, rom_fname_loaded); + static_buff[extpos++] = '_'; + if (page < 0) { + static_buff[extpos++] = 'p'; + static_buff[extpos++] = 'a'; + static_buff[extpos++] = 'd'; + } else + static_buff[extpos++] = '0'+PicoPicohw.page; + static_buff[extpos++] = '.'; + + for (i = 0; i < ARRAY_SIZE(pic_exts); i++) { + strcpy(static_buff+extpos, pic_exts[i]); + if (access(static_buff, R_OK) == 0) { + printf("found Pico file: %s\n", static_buff); + fname = static_buff; + break; } } + + pico_overlay = realloc(pico_overlay, w*h*2); + memset(pico_overlay, 0, w*h*2); + if (!fname || !pico_overlay || readpng(pico_overlay, fname, READPNG_SCALE, w, h)) { + if (pico_overlay) + free(pico_overlay); + pico_overlay = NULL; + } + + return pico_overlay; +} + +void emu_pico_overlay(u16 *pd, int w, int h, int pitch) +{ + u16 *overlay = NULL; + int y, oh = h; + + // get overlay + if (pico_inp_mode == 1) { + oh = (w/2 < h ? w/2 : h); // storyware has squished h + overlay = load_pico_overlay(PicoPicohw.page, w, oh); + } else if (pico_inp_mode == 2) + overlay = load_pico_overlay(-1, w, oh); + + // copy overlay onto buffer + if (overlay) { + for (y = 0; y < oh; y++) + memcpy(pd + y*pitch, overlay + y*w, w*2); + if (y < h) + memset(pd + y*pitch, 0, w*2); + } +} + +void run_events_pico(unsigned int events) +{ if (events & PEV_PICO_PPREV) { PicoPicohw.page--; if (PicoPicohw.page < 0) @@ -1008,35 +1125,62 @@ void run_events_pico(unsigned int events) PicoPicohw.page = 6; emu_status_msg("Page %i", PicoPicohw.page); } + if (events & PEV_PICO_STORY) { + if (pico_inp_mode == 1) { + pico_inp_mode = 0; + emu_status_msg("Input: D-Pad"); + } else { + pico_inp_mode = 1; + emu_status_msg("Input: Pen on Storyware"); + } + } + if (events & PEV_PICO_PAD) { + if (pico_inp_mode == 2) { + pico_inp_mode = 0; + emu_status_msg("Input: D-Pad"); + } else { + pico_inp_mode = 2; + emu_status_msg("Input: Pen on Pad"); + } + } + if (events & PEV_PICO_PENST) { + PicoPicohw.pen_pos[0] ^= 0x8000; + PicoPicohw.pen_pos[1] ^= 0x8000; + emu_status_msg("Pen %s", PicoPicohw.pen_pos[0] & 0x8000 ? "Up" : "Down"); + } + if ((currentConfig.EmuOpt & EOPT_PICO_PEN) && + (PicoIn.pad[0]&0x20) && pico_inp_mode && pico_overlay) { + pico_inp_mode = 0; + emu_status_msg("Input: D-Pad"); + } if (pico_inp_mode == 0) return; /* handle other input modes */ - if (PicoPad[0] & 1) pico_pen_y--; - if (PicoPad[0] & 2) pico_pen_y++; - if (PicoPad[0] & 4) pico_pen_x--; - if (PicoPad[0] & 8) pico_pen_x++; - PicoPad[0] &= ~0x0f; // release UDLR + if (PicoIn.pad[0] & 1) pico_pen_y--; + if (PicoIn.pad[0] & 2) pico_pen_y++; + if (PicoIn.pad[0] & 4) pico_pen_x--; + if (PicoIn.pad[0] & 8) pico_pen_x++; + PicoIn.pad[0] &= ~0x0f; // release UDLR - lim_x = (Pico.video.reg[12]&1) ? 319 : 255; - if (pico_pen_y < 8) - pico_pen_y = 8; - if (pico_pen_y > 224 - PICO_PEN_ADJUST_Y) - pico_pen_y = 224 - PICO_PEN_ADJUST_Y; - if (pico_pen_x < 0) - pico_pen_x = 0; - if (pico_pen_x > lim_x - PICO_PEN_ADJUST_X) - pico_pen_x = lim_x - PICO_PEN_ADJUST_X; + /* cursor position, cursor drawing must not cross screen borders */ + if (pico_pen_y < PICO_PEN_ADJUST_Y) + pico_pen_y = PICO_PEN_ADJUST_Y; + if (pico_pen_y > 223-1 - PICO_PEN_ADJUST_Y) + pico_pen_y = 223-1 - PICO_PEN_ADJUST_Y; + if (pico_pen_x < PICO_PEN_ADJUST_X) + pico_pen_x = PICO_PEN_ADJUST_X; + if (pico_pen_x > 319-1 - PICO_PEN_ADJUST_X) + pico_pen_x = 319-1 - PICO_PEN_ADJUST_X; - PicoPicohw.pen_pos[0] = pico_pen_x; - if (!(Pico.video.reg[12] & 1)) - PicoPicohw.pen_pos[0] += pico_pen_x / 4; - PicoPicohw.pen_pos[0] += 0x3c; - PicoPicohw.pen_pos[1] = pico_inp_mode == 1 ? (0x2f8 + pico_pen_y) : (0x1fc + pico_pen_y); + PicoPicohw.pen_pos[0] &= 0x8000; + PicoPicohw.pen_pos[1] &= 0x8000; + PicoPicohw.pen_pos[0] |= 0x03c + pico_pen_x; + PicoPicohw.pen_pos[1] |= (pico_inp_mode == 1 ? 0x2f8 : 0x1fc) + pico_pen_y; } -static void do_turbo(int *pad, int acts) +static void do_turbo(unsigned short *pad, int acts) { static int turbo_pad = 0; static unsigned char turbo_cnt[3] = { 0, 0, 0 }; @@ -1073,9 +1217,9 @@ static void run_events_ui(unsigned int which) char tmp[64]; int keys, len; - strcpy(tmp, (which & PEV_STATE_LOAD) ? "LOAD STATE?" : "OVERWRITE SAVE?"); + strcpy(tmp, (which & PEV_STATE_LOAD) ? "LOAD STATE? " : "OVERWRITE SAVE? "); len = strlen(tmp); - nm = in_get_key_name(-1, -PBTN_MA3); + nm = in_get_key_name(-1, -PBTN_MOK); snprintf(tmp + len, sizeof(tmp) - len, "(%s=yes, ", nm); len = strlen(tmp); nm = in_get_key_name(-1, -PBTN_MBACK); @@ -1084,15 +1228,16 @@ static void run_events_ui(unsigned int which) plat_status_msg_busy_first(tmp); in_set_config_int(0, IN_CFG_BLOCKING, 1); - while (in_menu_wait_any(NULL, 50) & (PBTN_MA3|PBTN_MBACK)) + while (in_menu_wait_any(NULL, 50) & (PBTN_MOK | PBTN_MBACK)) ; - while ( !((keys = in_menu_wait_any(NULL, 50)) & (PBTN_MA3|PBTN_MBACK)) ) + while ( !((keys = in_menu_wait_any(NULL, 50)) & (PBTN_MOK | PBTN_MBACK))) ; if (keys & PBTN_MBACK) do_it = 0; - while (in_menu_wait_any(NULL, 50) & (PBTN_MA3|PBTN_MBACK)) + while (in_menu_wait_any(NULL, 50) & (PBTN_MOK | PBTN_MBACK)) ; in_set_config_int(0, IN_CFG_BLOCKING, 0); + plat_status_msg_clear(); } if (do_it) { plat_status_msg_busy_first((which & PEV_STATE_LOAD) ? "LOADING STATE" : "SAVING STATE"); @@ -1120,6 +1265,8 @@ static void run_events_ui(unsigned int which) emu_status_msg("SAVE SLOT %i [%s]", state_slot, emu_check_save_file(state_slot, NULL) ? "USED" : "FREE"); } + if (which & PEV_RESET) + emu_reset_game(); if (which & PEV_MENU) engineState = PGS_Menu; } @@ -1128,21 +1275,29 @@ void emu_update_input(void) { static int prev_events = 0; int actions[IN_BINDTYPE_COUNT] = { 0, }; - int pl_actions[2]; + int pl_actions[4]; int events; in_update(actions); pl_actions[0] = actions[IN_BINDTYPE_PLAYER12]; pl_actions[1] = actions[IN_BINDTYPE_PLAYER12] >> 16; + pl_actions[2] = actions[IN_BINDTYPE_PLAYER34]; + pl_actions[3] = actions[IN_BINDTYPE_PLAYER34] >> 16; - PicoPad[0] = pl_actions[0] & 0xfff; - PicoPad[1] = pl_actions[1] & 0xfff; + PicoIn.pad[0] = pl_actions[0] & 0xfff; + PicoIn.pad[1] = pl_actions[1] & 0xfff; + PicoIn.pad[2] = pl_actions[2] & 0xfff; + PicoIn.pad[3] = pl_actions[3] & 0xfff; if (pl_actions[0] & 0x7000) - do_turbo(&PicoPad[0], pl_actions[0]); + do_turbo(&PicoIn.pad[0], pl_actions[0]); if (pl_actions[1] & 0x7000) - do_turbo(&PicoPad[1], pl_actions[1]); + do_turbo(&PicoIn.pad[1], pl_actions[1]); + if (pl_actions[2] & 0x7000) + do_turbo(&PicoIn.pad[2], pl_actions[2]); + if (pl_actions[3] & 0x7000) + do_turbo(&PicoIn.pad[3], pl_actions[3]); events = actions[IN_BINDTYPE_EMU] & PEV_MASK; @@ -1158,7 +1313,7 @@ void emu_update_input(void) events &= ~prev_events; - if (PicoAHW == PAHW_PICO) + if (PicoIn.AHW == PAHW_PICO) run_events_pico(events); if (events) run_events_ui(events); @@ -1173,29 +1328,33 @@ static void mkdir_path(char *path_with_reserve, int pos, const char *name) strcpy(path_with_reserve + pos, name); if (plat_is_dir(path_with_reserve)) return; - if (mkdir(path_with_reserve, 0777) < 0) + if (mkdir(path_with_reserve, 0755) < 0) lprintf("failed to create: %s\n", path_with_reserve); } -void emu_cmn_forced_frame(int no_scale, int do_emu) +void emu_cmn_forced_frame(int no_scale, int do_emu, void *buf) { - int po_old = PicoOpt; + int po_old = PicoIn.opt; + int y; - memset32(g_screen_ptr, 0, g_screen_width * g_screen_height * 2 / 4); + for (y = 0; y < g_screen_height; y++) + memset32((short *)g_screen_ptr + g_screen_ppitch * y, 0, + g_screen_width * 2 / 4); - PicoOpt &= ~POPT_ALT_RENDERER; - PicoOpt |= POPT_ACC_SPRITES; - if (!no_scale) - PicoOpt |= POPT_EN_SOFTSCALE; + PicoIn.opt &= ~(POPT_ALT_RENDERER|POPT_EN_SOFTSCALE); + PicoIn.opt |= POPT_ACC_SPRITES; + if (!no_scale && currentConfig.scaling) + PicoIn.opt |= POPT_EN_SOFTSCALE; PicoDrawSetOutFormat(PDF_RGB555, 1); + PicoDrawSetOutBuf(buf, g_screen_ppitch * 2); Pico.m.dirtyPal = 1; if (do_emu) PicoFrame(); else PicoFrameDrawOnly(); - PicoOpt = po_old; + PicoIn.opt = po_old; } void emu_init(void) @@ -1218,10 +1377,10 @@ void emu_init(void) /* make dirs for saves */ pos = plat_get_root_dir(path, sizeof(path) - 4); - mkdir_path(path, pos, "mds"); - mkdir_path(path, pos, "srm"); - mkdir_path(path, pos, "brm"); - mkdir_path(path, pos, "cfg"); + mkdir_path(path, pos, "ms0:/PSP/SAVEDATA/SUGC/mds"); + mkdir_path(path, pos, "ms0:/PSP/SAVEDATA/SUGC/srm"); + mkdir_path(path, pos, "ms0:/PSP/SAVEDATA/SUGC/brm"); + mkdir_path(path, pos, "ms0:/PSP/SAVEDATA/SUGC/cfg"); pprof_init(); @@ -1229,9 +1388,9 @@ void emu_init(void) config_readlrom(path); PicoInit(); - PicoMessage = plat_status_msg_busy_next; - PicoMCDopenTray = emu_tray_open; - PicoMCDcloseTray = emu_tray_close; + PicoIn.osdMessage = plat_status_msg_busy_next; + PicoIn.mcdTrayOpen = emu_tray_open; + PicoIn.mcdTrayClose = emu_tray_close; sndout_init(); } @@ -1239,9 +1398,9 @@ void emu_init(void) void emu_finish(void) { // save SRAM - if ((currentConfig.EmuOpt & EOPT_EN_SRAM) && SRam.changed) { + if ((currentConfig.EmuOpt & EOPT_EN_SRAM) && Pico.sv.changed) { emu_save_load_game(0, 1); - SRam.changed = 0; + Pico.sv.changed = 0; } if (!(currentConfig.EmuOpt & EOPT_NO_AUTOSVCFG)) { @@ -1261,26 +1420,30 @@ void emu_finish(void) static void snd_write_nonblocking(int len) { - sndout_write_nb(PsndOut, len); + sndout_write_nb(PicoIn.sndOut, len); } void emu_sound_start(void) { - PsndOut = NULL; + PicoIn.sndOut = NULL; + // auto-select rate? + if (PicoIn.sndRate > 52000 && PicoIn.sndRate < 54000) + PicoIn.sndRate = YM2612_NATIVE_RATE(); if (currentConfig.EmuOpt & EOPT_EN_SOUND) { - int is_stereo = (PicoOpt & POPT_EN_STEREO) ? 1 : 0; + int is_stereo = (PicoIn.opt & POPT_EN_STEREO) ? 1 : 0; + memset(sndBuffer, 0, sizeof(sndBuffer)); + PicoIn.sndOut = sndBuffer; PsndRerate(Pico.m.frame_count ? 1 : 0); printf("starting audio: %i len: %i stereo: %i, pal: %i\n", - PsndRate, PsndLen, is_stereo, Pico.m.pal); - sndout_start(PsndRate, is_stereo); - PicoWriteSound = snd_write_nonblocking; + PicoIn.sndRate, Pico.snd.len, is_stereo, Pico.m.pal); + + sndout_start(PicoIn.sndRate, is_stereo); + PicoIn.writeSound = snd_write_nonblocking; plat_update_volume(0, 0); - memset(sndBuffer, 0, sizeof(sndBuffer)); - PsndOut = sndBuffer; } } @@ -1318,16 +1481,17 @@ static void emu_loop_prep(void) } /* our tick here is 1 us right now */ -#define ms_to_ticks(x) (unsigned int)(x * 1000) -#define get_ticks() plat_get_ticks_us() +#define ms_to_ticks(x) (int)(x * 1000) +#define get_ticks() plat_get_ticks_us() +#define vsync_delay ms_to_ticks(1) void emu_loop(void) { int frames_done, frames_shown; /* actual frames for fps counter */ - int target_frametime_x3; - unsigned int timestamp_x3 = 0; - unsigned int timestamp_aim_x3 = 0; - unsigned int timestamp_fps_x3 = 0; + int target_frametime; + unsigned int timestamp = 0; + unsigned int timestamp_aim = 0; + unsigned int timestamp_fps = 0; char *notice_msg = NULL; char fpsbuff[24]; int fskip_cnt = 0; @@ -1336,19 +1500,15 @@ void emu_loop(void) PicoLoopPrepare(); - // prepare CD buffer - if (PicoAHW & PAHW_MCD) - PicoCDBufferInit(); - plat_video_loop_prepare(); emu_loop_prep(); pemu_sound_start(); /* number of ticks per frame */ if (Pico.m.pal) - target_frametime_x3 = 3 * ms_to_ticks(1000) / 50; + target_frametime = ms_to_ticks(1000) / 50; else - target_frametime_x3 = 3 * ms_to_ticks(1000) / 60; + target_frametime = ms_to_ticks(1000) / 60; reset_timing = 1; frames_done = frames_shown = 0; @@ -1364,26 +1524,26 @@ void emu_loop(void) if (reset_timing) { reset_timing = 0; plat_video_wait_vsync(); - timestamp_aim_x3 = get_ticks() * 3; - timestamp_fps_x3 = timestamp_aim_x3; + timestamp_aim = get_ticks(); + timestamp_fps = timestamp_aim; fskip_cnt = 0; } else if (currentConfig.EmuOpt & EOPT_NO_FRMLIMIT) { - timestamp_aim_x3 = get_ticks() * 3; + timestamp_aim = get_ticks(); } - timestamp_x3 = get_ticks() * 3; + timestamp = get_ticks(); // show notice_msg message? if (notice_msg_time != 0) { static int noticeMsgSum; - if (timestamp_x3 - ms_to_ticks(notice_msg_time) * 3 - > ms_to_ticks(STATUS_MSG_TIMEOUT) * 3) + if (timestamp - ms_to_ticks(notice_msg_time) + > ms_to_ticks(STATUS_MSG_TIMEOUT)) { notice_msg_time = 0; - plat_status_msg_clear(); notice_msg = NULL; + plat_status_msg_clear(); } else { int sum = noticeMsg[0] + noticeMsg[1] + noticeMsg[2]; @@ -1396,7 +1556,7 @@ void emu_loop(void) } // second changed? - if (timestamp_x3 - timestamp_fps_x3 >= ms_to_ticks(1000) * 3) + if (timestamp - timestamp_fps >= ms_to_ticks(1000)) { #ifdef BENCHMARK static int bench = 0, bench_fps = 0, bench_fps_s = 0, bfp = 0, bf[4]; @@ -1411,16 +1571,16 @@ void emu_loop(void) printf("%s\n", fpsbuff); #else if (currentConfig.EmuOpt & EOPT_SHOW_FPS) - sprintf(fpsbuff, "%02i/%02i ", frames_shown, frames_done); + snprintf(fpsbuff, 8, "%02i/%02i ", frames_shown, frames_done); #endif frames_shown = frames_done = 0; - timestamp_fps_x3 += ms_to_ticks(1000) * 3; + timestamp_fps += ms_to_ticks(1000); } #ifdef PFRAMES sprintf(fpsbuff, "%i", Pico.m.frame_count); #endif - diff = timestamp_aim_x3 - timestamp_x3; + diff = timestamp_aim - timestamp; if (currentConfig.Frameskip >= 0) // frameskip enabled (or 0) { @@ -1432,26 +1592,32 @@ void emu_loop(void) fskip_cnt = 0; } } - else if (diff < -target_frametime_x3) + else if (diff < -target_frametime) { /* no time left for this frame - skip */ - /* limit auto frameskip to 8 */ - if (frames_done / 8 <= frames_shown) + /* limit auto frameskip to max_skip */ + if (fskip_cnt < currentConfig.max_skip) { + fskip_cnt++; skip = 1; - } + } + else { + fskip_cnt = 0; + } + } else + fskip_cnt = 0; // don't go in debt too much - while (diff < -target_frametime_x3 * 3) { - timestamp_aim_x3 += target_frametime_x3; - diff = timestamp_aim_x3 - timestamp_x3; + while (diff < -target_frametime * 3) { + timestamp_aim += target_frametime; + diff = timestamp_aim - timestamp; } emu_update_input(); if (skip) { - int do_audio = diff > -target_frametime_x3 * 2; - PicoSkipFrame = do_audio ? 1 : 2; + int do_audio = diff > -target_frametime * 2; + PicoIn.skipFrame = do_audio ? 1 : 2; PicoFrame(); - PicoSkipFrame = 0; + PicoIn.skipFrame = 0; } else { PicoFrame(); @@ -1459,7 +1625,7 @@ void emu_loop(void) frames_shown++; } frames_done++; - timestamp_aim_x3 += target_frametime_x3; + timestamp_aim += target_frametime; if (!skip && !flip_after_sync) plat_video_flip(); @@ -1469,18 +1635,18 @@ void emu_loop(void) && !(currentConfig.EmuOpt & (EOPT_NO_FRMLIMIT|EOPT_EXT_FRMLIMIT))) { unsigned int timestamp = get_ticks(); - diff = timestamp_aim_x3 - timestamp * 3; + diff = timestamp_aim - timestamp; // sleep or vsync if we are still too fast - if (diff > target_frametime_x3 && (currentConfig.EmuOpt & EOPT_VSYNC)) { + if (diff > target_frametime + vsync_delay && (currentConfig.EmuOpt & EOPT_VSYNC)) { // we are too fast plat_video_wait_vsync(); timestamp = get_ticks(); - diff = timestamp * 3 - timestamp_aim_x3; + diff = timestamp_aim - timestamp; } - if (diff > target_frametime_x3) { + if (diff > target_frametime + vsync_delay) { // still too fast - plat_wait_till_us(timestamp + (diff - target_frametime_x3) / 3); + plat_wait_till_us(timestamp + (diff - target_frametime)); } } @@ -1493,18 +1659,12 @@ void emu_loop(void) emu_set_fastforward(0); // save SRAM - if ((currentConfig.EmuOpt & EOPT_EN_SRAM) && SRam.changed) { + if ((currentConfig.EmuOpt & EOPT_EN_SRAM) && Pico.sv.changed) { plat_status_msg_busy_first("Writing SRAM/BRAM..."); emu_save_load_game(0, 1); - SRam.changed = 0; + Pico.sv.changed = 0; } pemu_loop_end(); emu_sound_stop(); - - // pemu_loop_end() might want to do 1 frame for bg image, - // so free CD buffer here - if (PicoAHW & PAHW_MCD) - PicoCDBufferFree(); } - diff --git a/platform/common/emu.h b/platform/common/emu.h index 8f8f61f4..c477883e 100644 --- a/platform/common/emu.h +++ b/platform/common/emu.h @@ -16,6 +16,7 @@ extern void *g_screen_ptr; extern int g_screen_width; extern int g_screen_height; +extern int g_screen_ppitch; // pitch in pixels #define EOPT_EN_SRAM (1<<0) #define EOPT_SHOW_FPS (1<<1) @@ -33,11 +34,30 @@ extern int g_screen_height; #define EOPT_NO_FRMLIMIT (1<<18) #define EOPT_WIZ_TEAR_FIX (1<<19) #define EOPT_EXT_FRMLIMIT (1<<20) // no internal frame limiter (limited by snd, etc) +#define EOPT_PICO_PEN (1<<21) enum { EOPT_SCALE_NONE = 0, - EOPT_SCALE_SW, + // linux, GP2X: + EOPT_SCALE_SW = 1, EOPT_SCALE_HW, + // PSP horiz: + EOPT_SCALE_43 = 1, // 4:3 screen + EOPT_SCALE_STRETCH, // stretched to between _43 and _WIDE + EOPT_SCALE_WIDE, // stretched to match display width + // PSP vert: + EOPT_VSCALE_FULL = 1, // TV height scaled to screen height + EOPT_VSCALE_NOBORDER, // VDP area scaled to screen height +}; + +enum { + EOPT_FILTER_NONE = 0, + // PSP texture filtering + EOPT_FILTER_BILINEAR = 1, + // software scalers + EOPT_FILTER_SMOOTHER = 1, + EOPT_FILTER_BILINEAR1, + EOPT_FILTER_BILINEAR2, }; enum { @@ -53,7 +73,9 @@ typedef struct _currentConfig_t { int s_PsndRate; int s_PicoRegion; int s_PicoAutoRgnOrder; + int s_hwSelect; int s_PicoCDBuffers; + int s_PicoSndFilterAlpha; int Frameskip; int input_dev0; int input_dev1; @@ -61,19 +83,20 @@ typedef struct _currentConfig_t { int CPUclock; int volume; int gamma; - int scaling; // gp2x: EOPT_SCALE_*; psp: bilinear filtering + int scaling; // EOPT_SCALE_* int vscaling; int rotation; // for UIQ - float scale; // psp: screen scale - float hscale32, hscale40; // psp: horizontal scale int gamma2; // psp: black level int turbo_rate; int renderer; int renderer32x; - int filter; // pandora + int filter; // EOPT_FILTER_* video filter + int ghosting; int analog_deadzone; int msh2_khz; int ssh2_khz; + int overclock_68k; + int max_skip; } currentConfig_t; extern currentConfig_t currentConfig, defaultConfig; @@ -84,8 +107,8 @@ extern unsigned char *movie_data; extern int reset_timing; extern int flip_after_sync; -#define PICO_PEN_ADJUST_X 4 -#define PICO_PEN_ADJUST_Y 2 +#define PICO_PEN_ADJUST_X 1 +#define PICO_PEN_ADJUST_Y 1 extern int pico_pen_x, pico_pen_y; extern int pico_inp_mode; @@ -129,19 +152,23 @@ void emu_text_out16(int x, int y, const char *text); void emu_text_out8_rot (int x, int y, const char *text); void emu_text_out16_rot(int x, int y, const char *text); +void emu_osd_text16(int x, int y, const char *text); + void emu_make_path(char *buff, const char *end, int size); void emu_update_input(void); void emu_get_game_name(char *str150); void emu_set_fastforward(int set_on); void emu_status_msg(const char *format, ...); +void emu_pico_overlay(unsigned short *pd, int w, int h, int pitch); + /* default sound code */ void emu_sound_start(void); void emu_sound_stop(void); void emu_sound_wait(void); /* used by some (but not all) platforms */ -void emu_cmn_forced_frame(int no_scale, int do_emu); +void emu_cmn_forced_frame(int no_scale, int do_emu, void *buf); /* stuff to be implemented by platform code */ extern const char *renderer_names[]; @@ -156,6 +183,7 @@ void pemu_finalize_frame(const char *fps, const char *notice_msg); void pemu_sound_start(void); +int plat_parse_arg(int argc, char *argv[], int *x); void plat_early_init(void); void plat_init(void); void plat_finish(void); @@ -167,9 +195,16 @@ void plat_status_msg_clear(void); void plat_video_toggle_renderer(int change, int menu_call); void plat_video_loop_prepare(void); +void plat_video_set_buffer(void *); void plat_update_volume(int has_changed, int is_up); +/* should be in libpicofe/plat.h */ +void plat_video_clear_status(void); +void plat_video_clear_buffers(void); +void plat_video_set_size(int w, int h); +void plat_video_set_shadow(int w, int h); + #ifdef __cplusplus } // extern "C" #endif diff --git a/platform/common/helix/Makefile b/platform/common/helix/Makefile new file mode 100644 index 00000000..9f3f4ef0 --- /dev/null +++ b/platform/common/helix/Makefile @@ -0,0 +1,43 @@ +CROSS_COMPILE ?= arm-linux-gnueabi- + +CC = $(CROSS_COMPILE)gcc +AS = $(CROSS_COMPILE)as +AR = $(CROSS_COMPILE)ar +TOOLCHAIN = $(notdir $(CROSS_COMPILE)) +LIBGCC ?= ${HOME}/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1/libgcc.a + +CFLAGS += -Ipub -O2 -Wall -fstrict-aliasing -ffast-math +ifneq ($(findstring arm-,$(TOOLCHAIN)),) +CFLAGS += -mcpu=arm940t -mtune=arm940t -mfloat-abi=soft -mfpu=fpa -mabi=apcs-gnu -mno-thumb-interwork +ASFLAGS = -mcpu=arm940t -mfloat-abi=soft -mfpu=fpa -mabi=apcs-gnu +OBJS += real/arm/asmpoly_gcc.o +else +CFLAGS += -m32 +ASFLAGS += -m32 +OBJS += real/polyphase.o +endif + +LIB = $(TOOLCHAIN)helix_mp3.a +SHLIB = $(TOOLCHAIN)helix_mp3.so + +all: $(LIB) $(SHLIB) + + +OBJS += mp3dec.o mp3tabs.o +#OBJS += ipp/bitstream.o ipp/buffers.o ipp/dequant.o ipp/huffman.o ipp/imdct.o ipp/subband.o +OBJS += real/bitstream.o real/buffers.o real/dct32.o real/dequant.o real/dqchan.o real/huffman.o +OBJS += real/hufftabs.o real/imdct.o real/scalfact.o real/stproc.o real/subband.o real/trigtabs.o + +OBJS += lib.o + +real/arm/asmpoly_gcc.o: real/arm/asmpoly_gcc.s + $(CC) -o $@ $(ASFLAGS) -c $< + +$(LIB) : $(OBJS) + $(AR) r $@ $^ +$(SHLIB) : $(OBJS) $(LIBGCC) + $(CC) -o $@ -nostdlib -shared $(CFLAGS) $^ + +clean: + $(RM) -f $(OBJS) + diff --git a/platform/common/helix/lib.c b/platform/common/helix/lib.c new file mode 100644 index 00000000..d2b05898 --- /dev/null +++ b/platform/common/helix/lib.c @@ -0,0 +1,57 @@ +#include +#include + +// libgcc has this with gcc 4.x +void raise(int sig) +{ +} + +// very limited heap functions for helix decoder + +static char heap[65000] __attribute__((aligned(16))); +static long heap_offs; + +void __malloc_init(void) +{ + heap_offs = 0; +} + +void *malloc(size_t size) +{ + void *chunk = heap + heap_offs; + size = (size+15) & ~15; + if (heap_offs + size > sizeof(heap)) + return NULL; + else { + heap_offs += size; + return chunk; + } +} + +void free(void *chunk) +{ + if (chunk == heap) + heap_offs = 0; +} + +#if 0 +void *memcpy (void *dest, const void *src, size_t n) +{ + char *_dest = dest; + const char *_src = src; + while (n--) *_dest++ = *_src++; + return dest; +} + +void *memmove (void *dest, const void *src, size_t n) +{ + char *_dest = dest+n; + const char *_src = src+n; + if (dest <= src || dest >= _src) + return memcpy(dest, src, n); + while (n--) *--_dest = *--_src; + return dest; +} +#else +#include "../memcpy.c" +#endif diff --git a/platform/common/host_dasm.c b/platform/common/host_dasm.c new file mode 100644 index 00000000..b0f37d72 --- /dev/null +++ b/platform/common/host_dasm.c @@ -0,0 +1,118 @@ +/* + * DRC host disassembler interface for MIPS/ARM32 for use without binutils + * (C) irixxxx, 2018-2021 + */ +#include +#include +#include +#include + +#if defined __mips__ +#include "dismips.c" +#define disasm dismips +#elif defined __arm__ +#include "disarm.c" +#define disasm disarm +#endif + +/* symbols */ +typedef struct { const char *name; void *value; } asymbol; + +static asymbol **symbols; +static long symcount, symstorage = 8; + +static const char *lookup_name(void *addr) +{ + asymbol **sptr = symbols; + int i; + + for (i = 0; i < symcount; i++) { + asymbol *sym = *sptr++; + + if (addr == sym->value) + return sym->name; + } + + return NULL; +} + +#ifdef disasm +void host_dasm(void *addr, int len) +{ + void *end = (char *)addr + len; + const char *name; + char buf[64]; + unsigned long insn, symaddr; + + while (addr < end) { + name = lookup_name(addr); + if (name != NULL) + printf("%s:\n", name); + + insn = *(unsigned long *)addr; + printf(" %08lx %08lx ", (long)addr, insn); + if(disasm((uintptr_t)addr, insn, buf, sizeof(buf), &symaddr)) + { + if (symaddr) + name = lookup_name((void *)symaddr); + if (symaddr && name) + printf("%s <%s>\n", buf, name); + else if (symaddr && !name) + printf("%s \n", buf); + else + printf("%s\n", buf); + } else + printf("unknown (0x%08lx)\n", insn); + addr = (char *)addr + sizeof(long); + } +} +#else +void host_dasm(void *addr, int len) +{ + uint8_t *end = (uint8_t *)addr + len; + char buf[64]; + uint8_t *p = addr; + int i = 0, o = 0; + + o = snprintf(buf, sizeof(buf), "%p: ", p); + while (p < end) { + o += snprintf(buf+o, sizeof(buf)-o, "%02x ", *p++); + if (++i >= 16) { + buf[o] = '\0'; + printf("%s\n", buf); + o = snprintf(buf, sizeof(buf), "%p: ", p); + i = 0; + } + } + if (i) { + buf[o] = '\0'; + printf("%s\n", buf); + } +} +#endif + +void host_dasm_new_symbol_(void *addr, const char *name) +{ + asymbol *sym, **tmp; + + if (symbols == NULL) + symbols = malloc(symstorage); + if (symstorage <= symcount * sizeof(symbols[0])) { + tmp = realloc(symbols, symstorage * 2); + if (tmp == NULL) + return; + symstorage *= 2; + symbols = tmp; + } + + symbols[symcount] = calloc(sizeof(*symbols[0]), 1); + if (symbols[symcount] == NULL) + return; + + // a HACK (should use correct section), but ohwell + sym = symbols[symcount]; + sym->value = addr; + sym->name = name; + symcount++; +} + diff --git a/platform/common/input_pico.h b/platform/common/input_pico.h index c2730b52..2f3de418 100644 --- a/platform/common/input_pico.h +++ b/platform/common/input_pico.h @@ -27,7 +27,10 @@ #define PEVB_FF 22 #define PEVB_PICO_PNEXT 21 #define PEVB_PICO_PPREV 20 -#define PEVB_PICO_SWINP 19 +#define PEVB_PICO_STORY 19 +#define PEVB_PICO_PAD 18 +#define PEVB_PICO_PENST 17 +#define PEVB_RESET 16 #define PEV_VOL_DOWN (1 << PEVB_VOL_DOWN) #define PEV_VOL_UP (1 << PEVB_VOL_UP) @@ -40,8 +43,11 @@ #define PEV_FF (1 << PEVB_FF) #define PEV_PICO_PNEXT (1 << PEVB_PICO_PNEXT) #define PEV_PICO_PPREV (1 << PEVB_PICO_PPREV) -#define PEV_PICO_SWINP (1 << PEVB_PICO_SWINP) +#define PEV_PICO_STORY (1 << PEVB_PICO_STORY) +#define PEV_PICO_PAD (1 << PEVB_PICO_PAD) +#define PEV_PICO_PENST (1 << PEVB_PICO_PENST) +#define PEV_RESET (1 << PEVB_RESET) -#define PEV_MASK 0x7ff80000 +#define PEV_MASK 0x7fff0000 #endif /* INCLUDE_c48097f3ff2a6a9af1cce8fd7a9b3f0c */ diff --git a/platform/common/inputmap_kbd.c b/platform/common/inputmap_kbd.c new file mode 100644 index 00000000..8fbb29f8 --- /dev/null +++ b/platform/common/inputmap_kbd.c @@ -0,0 +1,73 @@ +#include +#include + +#include "../libpicofe/input.h" +#include "../libpicofe/in_sdl.h" +#include "../libpicofe/plat.h" +#include "../common/input_pico.h" +#include "../common/plat_sdl.h" + +const struct in_default_bind _in_sdl_defbinds[] = { + { SDLK_UP, IN_BINDTYPE_PLAYER12, GBTN_UP }, + { SDLK_DOWN, IN_BINDTYPE_PLAYER12, GBTN_DOWN }, + { SDLK_LEFT, IN_BINDTYPE_PLAYER12, GBTN_LEFT }, + { SDLK_RIGHT, IN_BINDTYPE_PLAYER12, GBTN_RIGHT }, + { SDLK_z, IN_BINDTYPE_PLAYER12, GBTN_A }, + { SDLK_x, IN_BINDTYPE_PLAYER12, GBTN_B }, + { SDLK_c, IN_BINDTYPE_PLAYER12, GBTN_C }, + { SDLK_a, IN_BINDTYPE_PLAYER12, GBTN_X }, + { SDLK_s, IN_BINDTYPE_PLAYER12, GBTN_Y }, + { SDLK_d, IN_BINDTYPE_PLAYER12, GBTN_Z }, + { SDLK_RETURN, IN_BINDTYPE_PLAYER12, GBTN_START }, + { SDLK_f, IN_BINDTYPE_PLAYER12, GBTN_MODE }, + { SDLK_ESCAPE, IN_BINDTYPE_EMU, PEVB_MENU }, + { SDLK_TAB, IN_BINDTYPE_EMU, PEVB_RESET }, + { SDLK_F1, IN_BINDTYPE_EMU, PEVB_STATE_SAVE }, + { SDLK_F2, IN_BINDTYPE_EMU, PEVB_STATE_LOAD }, + { SDLK_F3, IN_BINDTYPE_EMU, PEVB_SSLOT_PREV }, + { SDLK_F4, IN_BINDTYPE_EMU, PEVB_SSLOT_NEXT }, + { SDLK_F5, IN_BINDTYPE_EMU, PEVB_SWITCH_RND }, + { SDLK_F6, IN_BINDTYPE_EMU, PEVB_PICO_PPREV }, + { SDLK_F7, IN_BINDTYPE_EMU, PEVB_PICO_PNEXT }, + { SDLK_F8, IN_BINDTYPE_EMU, PEVB_PICO_STORY }, + { SDLK_F9, IN_BINDTYPE_EMU, PEVB_PICO_PAD }, + { SDLK_F10, IN_BINDTYPE_EMU, PEVB_PICO_PENST }, + { SDLK_BACKSPACE, IN_BINDTYPE_EMU, PEVB_FF }, + { 0, 0, 0 } +}; +const struct in_default_bind *in_sdl_defbinds = _in_sdl_defbinds; + +const struct menu_keymap _in_sdl_key_map[] = { + { SDLK_UP, PBTN_UP }, + { SDLK_DOWN, PBTN_DOWN }, + { SDLK_LEFT, PBTN_LEFT }, + { SDLK_RIGHT, PBTN_RIGHT }, + { SDLK_RETURN, PBTN_MOK }, + { SDLK_ESCAPE, PBTN_MBACK }, + { SDLK_COMMA, PBTN_MA2 }, + { SDLK_PERIOD, PBTN_MA3 }, + { SDLK_TAB, PBTN_R }, + { SDLK_BACKSPACE, PBTN_L }, +}; +const int in_sdl_key_map_sz = sizeof(_in_sdl_key_map) / sizeof(_in_sdl_key_map[0]); +const struct menu_keymap *in_sdl_key_map = _in_sdl_key_map; + +const struct menu_keymap _in_sdl_joy_map[] = { + { SDLK_UP, PBTN_UP }, + { SDLK_DOWN, PBTN_DOWN }, + { SDLK_LEFT, PBTN_LEFT }, + { SDLK_RIGHT, PBTN_RIGHT }, + /* joystick */ + { SDLK_WORLD_0, PBTN_MOK }, + { SDLK_WORLD_1, PBTN_MBACK }, + { SDLK_WORLD_2, PBTN_MA2 }, + { SDLK_WORLD_3, PBTN_MA3 }, +}; +const int in_sdl_joy_map_sz = sizeof(_in_sdl_joy_map) / sizeof(_in_sdl_joy_map[0]); +const struct menu_keymap *in_sdl_joy_map = _in_sdl_joy_map; + +const char * const *in_sdl_key_names = NULL; + +void plat_target_setup_input(void) +{ +} diff --git a/platform/common/main.c b/platform/common/main.c index 52676676..8e6987e4 100644 --- a/platform/common/main.c +++ b/platform/common/main.c @@ -10,6 +10,9 @@ #include #include #include +#ifdef USE_SDL +#include +#endif #include "../libpicofe/input.h" #include "../libpicofe/plat.h" @@ -18,7 +21,6 @@ #include "version.h" #include - static int load_state_slot = -1; char **g_argv; @@ -26,7 +28,7 @@ void parse_cmd_line(int argc, char *argv[]) { int x, unrecognized = 0; - for (x = 1; x < argc; x++) + for (x = 1; x < argc && !unrecognized; x++) { if (argv[x][0] == '-') { @@ -45,15 +47,13 @@ void parse_cmd_line(int argc, char *argv[]) if (x+2 < argc) { pdb_net_connect(argv[x+1], argv[x+2]); x += 2; } } else { - unrecognized = 1; - break; + unrecognized = plat_parse_arg(argc, argv, &x); } } else { FILE *f = fopen(argv[x], "rb"); if (f) { fclose(f); rom_fname_reload = argv[x]; - engineState = PGS_ReloadRom; } else unrecognized = 1; @@ -82,21 +82,23 @@ int main(int argc, char *argv[]) //in_probe(); plat_target_init(); + if (argc > 1) + parse_cmd_line(argc, argv); + plat_init(); + menu_init(); emu_prep_defconfig(); // depends on input emu_read_config(NULL, 0); emu_init(); - menu_init(); - engineState = PGS_Menu; - - if (argc > 1) - parse_cmd_line(argc, argv); + engineState = rom_fname_reload ? PGS_ReloadRom : PGS_Menu; + plat_video_menu_enter(0); if (engineState == PGS_ReloadRom) { + plat_video_menu_begin(); if (emu_reload_rom(rom_fname_reload)) { engineState = PGS_Running; if (load_state_slot >= 0) { @@ -104,7 +106,9 @@ int main(int argc, char *argv[]) emu_save_load_game(1, 0); } } + plat_video_menu_end(); } + plat_video_menu_leave(); for (;;) { @@ -132,7 +136,13 @@ int main(int argc, char *argv[]) /* vvv fallthrough */ case PGS_Running: +#ifdef GPERF + ProfilerStart("gperf.out"); +#endif emu_loop(); +#ifdef GPERF + ProfilerStop(); +#endif break; case PGS_Quit: diff --git a/platform/common/memcpy.c b/platform/common/memcpy.c new file mode 100644 index 00000000..1cd74175 --- /dev/null +++ b/platform/common/memcpy.c @@ -0,0 +1,134 @@ +/* + * (C) 2018 Kai-Uwe Bloem + * + * 32bit ARM/MIPS optimized C implementation of memcpy and memove, designed for + * good performance with gcc. + * - if src and dest have the same alignment, 4-word copy is used. + * - if src and dest are unaligned to each other, still loads word data and + * stores correctly shifted word data (for all but the first and last bytes + * to avoid under/overstepping the src region). + * + * ATTN does dirty aliasing tricks with undefined behaviour by standard. + * (however, this improved the generated code). + * ATTN uses struct assignment, which only works if the compiler is inlining + * this (else it would probably call memcpy :-)). + */ +#include +#include + +#include +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define _L_ >> +#define _U_ << +#else +#define _L_ << +#define _U_ >> +#endif + +void *memcpy(void *dest, const void *src, size_t n) +{ + struct _16 { uint32_t a[4]; }; + union { const void *v; uint8_t *c; uint32_t *i; uint64_t *l; struct _16 *s; } + ss = { src }, ds = { dest }; + const int lm = sizeof(uint32_t)-1; + + /* align src to word */ + while (((uintptr_t)ss.c & lm) && n > 0) + *ds.c++ = *ss.c++, n--; + if (((uintptr_t)ds.c & lm) == 0) { + /* fast copy if pointers have the same aligment */ + while (n >= sizeof(struct _16)) /* copy 16 byte blocks */ + *ds.s++ = *ss.s++, n -= sizeof(struct _16); + if (n >= sizeof(uint64_t)) /* copy leftover 8 byte block */ + *ds.l++ = *ss.l++, n -= sizeof(uint64_t); +// if (n >= sizeof(uint32_t)) /* copy leftover 4 byte block */ +// *ds.i++ = *ss.i++, n -= sizeof(uint32_t); + } else if (n >= 2*sizeof(uint32_t)) { + /* unaligned data big enough to avoid overstepping src */ + uint32_t v1, v2, b, s; + /* align dest to word */ + while (((uintptr_t)ds.c & lm) && n > 0) + *ds.c++ = *ss.c++, n--; + /* copy loop: load aligned words and store shifted words */ + b = (uintptr_t)ss.c & lm, s = b*8; ss.c -= b; + v1 = *ss.i++, v2 = *ss.i++; + while (n >= 3*sizeof(uint32_t)) { + *ds.i++ = (v1 _L_ s) | (v2 _U_ (32-s)); v1 = *ss.i++; + *ds.i++ = (v2 _L_ s) | (v1 _U_ (32-s)); v2 = *ss.i++; + n -= 2*sizeof(uint32_t); + } + /* data for one more store is already loaded */ + if (n >= sizeof(uint32_t)) { + *ds.i++ = (v1 _L_ s) | (v2 _U_ (32-s)); + n -= sizeof(uint32_t); + ss.c += sizeof(uint32_t); + } + ss.c += b - 2*sizeof(uint32_t); + } + /* copy 0-7 leftover bytes */ + while (n >= 4) { + *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--; + *ds.c++ = *ss.c++, n--; *ds.c++ = *ss.c++, n--; + } + while (n > 0) + *ds.c++ = *ss.c++, n--; + return dest; +} + +void *memmove (void *dest, const void *src, size_t n) +{ + struct _16 { uint32_t a[4]; }; + union { const void *v; uint8_t *c; uint32_t *i; uint64_t *l; struct _16 *s; } + ss = { src+n }, ds = { dest+n }; + size_t pd = dest > src ? dest - src : src - dest; + const int lm = sizeof(uint32_t)-1; + + if (dest <= src || dest >= src+n) + return memcpy(dest, src, n); + + /* align src to word */ + while (((uintptr_t)ss.c & lm) && n > 0) + *--ds.c = *--ss.c, n--; + /* take care not to copy multi-byte data if it overlaps */ + if (((uintptr_t)ds.c & lm) == 0) { + /* fast copy if pointers have the same aligment */ + while (n >= sizeof(struct _16) && pd >= sizeof(struct _16)) + /* copy 16 bytes blocks if no overlap */ + *--ds.s = *--ss.s, n -= sizeof(struct _16); + while (n >= sizeof(uint64_t) && pd >= sizeof(uint64_t)) + /* copy leftover 8 byte blocks if no overlap */ + *--ds.l = *--ss.l, n -= sizeof(uint64_t); + while (n >= sizeof(uint32_t) && pd >= sizeof(uint32_t)) + /* copy leftover 4 byte blocks if no overlap */ + *--ds.i = *--ss.i, n -= sizeof(uint32_t); + } else if (n >= 2*sizeof(uint32_t) && pd >= 2*sizeof(uint32_t)) { + /* unaligned data big enough to avoid understepping src */ + uint32_t v1, v2, b, s; + /* align dest to word */ + while (((uintptr_t)ds.c & lm) && n > 0) + *--ds.c = *--ss.c, n--; + /* copy loop: load aligned words and store shifted words */ + b = (uintptr_t)ss.c & lm, s = b*8; ss.c += b; + v1 = *--ss.i, v2 = *--ss.i; + while (n >= 3*sizeof(uint32_t)) { + *--ds.i = (v1 _U_ s) | (v2 _L_ (32-s)); v1 = *--ss.i; + *--ds.i = (v2 _U_ s) | (v1 _L_ (32-s)); v2 = *--ss.i; + n -= 2*sizeof(uint32_t); + } + /* data for one more store is already loaded */ + if (n >= sizeof(uint32_t)) { + *--ds.i = (v1 _U_ s) | (v2 _L_ (32-s)); + n -= sizeof(uint32_t); + ss.c -= sizeof(uint32_t); + } + ss.c -= b - 2*sizeof(uint32_t); + } + /* copy 0-7 leftover bytes (or upto everything if ptrs are too close) */ + while (n >= 4) { + *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; + *--ds.c = *--ss.c, n--; *--ds.c = *--ss.c, n--; + } + while (n > 0) + *--ds.c = *--ss.c, n--; + return dest; +} diff --git a/platform/common/menu_pico.c b/platform/common/menu_pico.c index 372fad91..aff2ad85 100644 --- a/platform/common/menu_pico.c +++ b/platform/common/menu_pico.c @@ -1,6 +1,7 @@ /* * PicoDrive * (C) notaz, 2010,2011 + * (C) irixxxx, 2023,2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -14,22 +15,30 @@ #include "input_pico.h" #include "version.h" -#include + +#include "../libpicofe/plat.h" + +#include #include -#ifdef PANDORA +#if defined(PANDORA) || defined(__PS2__) #define MENU_X2 1 #else #define MENU_X2 0 #endif +#define COL_ROM PXMAKE(0xbf, 0xbf, 0xff) +#define COL_OTH PXMAKE(0xaf, 0xff, 0xaf) + // FIXME +#ifndef REVISION #define REVISION "0" +#endif static const char *rom_exts[] = { - "zip", - "bin", "smd", "gen", "md", - "iso", "cso", "cue", + "zip", "bin", + "pco", "smd", "gen", "md", + "iso", "cso", "cue", "chd", "32x", "sms", NULL @@ -49,48 +58,60 @@ static unsigned short fname2color(const char *fname) } for (i = 0; rom_exts[i] != NULL; i++) - if (strcasecmp(ext, rom_exts[i]) == 0) return 0xbdff; // FIXME: mk defines + if (strcasecmp(ext, rom_exts[i]) == 0) return COL_ROM; for (i = 0; i < array_size(other_exts); i++) - if (strcasecmp(ext, other_exts[i]) == 0) return 0xaff5; - return 0xffff; + if (strcasecmp(ext, other_exts[i]) == 0) return COL_OTH; + return PXMAKE(0xff, 0xff, 0xff); } -#include "../libpicofe/menu.c" +#include static const char *men_dummy[] = { NULL }; +static int menu_w, menu_h; /* platform specific options and handlers */ #if defined(__GP2X__) -#include "../gp2x/menu.c" +#include +#elif defined(__PSP__) +#include +#elif defined(__PS2__) +#include #elif defined(PANDORA) -#include "../pandora/menu.c" +#include #else -#define MENU_OPTIONS_GFX -#define MENU_OPTIONS_ADV +#include #endif -static void make_bg(int no_scale) +static void make_bg(int no_scale, int from_screen) { unsigned short *src = (void *)g_menubg_src_ptr; - int w = g_screen_width, h = g_screen_height; + int w = g_menubg_src_w ? g_menubg_src_w : g_screen_width; + int h = g_menubg_src_h ? g_menubg_src_h : g_screen_height; + int pp = g_menubg_src_pp ? g_menubg_src_pp : g_screen_ppitch; short *dst; int x, y; - if (src == NULL) { - memset(g_menubg_ptr, 0, g_menuscreen_w * g_menuscreen_h * 2); - return; + if (from_screen) { + src = g_screen_ptr; + w = g_screen_width; + h = g_screen_height; + pp = g_screen_ppitch; } + memset(g_menubg_ptr, 0, g_menuscreen_w * g_menuscreen_h * 2); + if (src == NULL) + return; + if (!no_scale && g_menuscreen_w / w >= 2 && g_menuscreen_h / h >= 2) { - unsigned int t, *d = g_menubg_ptr; + u32 t, *d = g_menubg_ptr; d += (g_menuscreen_h / 2 - h * 2 / 2) * g_menuscreen_w / 2; d += (g_menuscreen_w / 2 - w * 2 / 2) / 2; - for (y = 0; y < h; y++, src += w, d += g_menuscreen_w*2/2) { + for (y = 0; y < h; y++, src += pp, d += g_menuscreen_w*2/2) { for (x = 0; x < w; x++) { t = src[x]; - t = ((t & 0xf79e)>>1) - ((t & 0xc618)>>3); + t = (PXMASKH(t,1)>>1) - (PXMASKH(t,3)>>3); t |= t << 16; d[x] = d[x + g_menuscreen_w / 2] = t; } @@ -107,27 +128,74 @@ static void make_bg(int no_scale) (g_menuscreen_w / 2 - w / 2); // darken the active framebuffer - for (; h > 0; dst += g_menuscreen_w, src += g_screen_width, h--) + for (; h > 0; dst += g_menuscreen_w, src += pp, h--) menu_darken_bg(dst, src, w, 1); } -static void menu_enter(int is_rom_loaded) +static void copy_bg(int dir) { - if (is_rom_loaded) - { - make_bg(0); + unsigned short *bg = (void *)g_menubg_ptr; + unsigned short *sc = (void *)g_menuscreen_ptr; + int h = g_menuscreen_h; + + for (; h > 0; sc += g_menuscreen_pp, bg += g_menuscreen_w, h--) { + if (dir) + memcpy(bg, sc, g_menuscreen_w * 2); + else + memcpy(sc, bg, g_menuscreen_w * 2); } +} + +static void menu_draw_prep(void) +{ + if (menu_w == g_menuscreen_w && menu_h == g_menuscreen_h) + return; + menu_w = g_menuscreen_w, menu_h = g_menuscreen_h; + + if (PicoGameLoaded) + { + make_bg(0, 0); + } + else { + int pos; char buff[256]; + pos = plat_get_skin_dir(buff, 256); + strcpy(buff + pos, "background.png"); // should really only happen once, on startup.. - emu_make_path(buff, "skin/background.png", sizeof(buff)); - if (readpng(g_menubg_ptr, buff, READPNG_BG, g_menuscreen_w, g_menuscreen_h) < 0) + memset(g_menubg_ptr, 0, g_menuscreen_w * g_menuscreen_h * 2); + if (readpng(g_menubg_ptr, buff, READPNG_BG, + g_menuscreen_w, g_menuscreen_h) < 0) memset(g_menubg_ptr, 0, g_menuscreen_w * g_menuscreen_h * 2); } +} - plat_video_menu_enter(is_rom_loaded); +static void menu_draw_prep_selector(void) +{ + if (menu_w == g_menuscreen_w && menu_h == g_menuscreen_h) + return; + menu_w = g_menuscreen_w, menu_h = g_menuscreen_h; + + if (PicoGameLoaded) + { + make_bg(0, 0); + } + + else + { + int pos; + char buff[256]; + pos = plat_get_skin_dir(buff, 256); + strcpy(buff + pos, "background_selector.png"); + + // should really only happen once, on startup.. + memset(g_menubg_ptr, 0, g_menuscreen_w * g_menuscreen_h * 2); + if (readpng(g_menubg_ptr, buff, READPNG_BG, + g_menuscreen_w, g_menuscreen_h) < 0) + memset(g_menubg_ptr, 0, g_menuscreen_w * g_menuscreen_h * 2); + } } static void draw_savestate_bg(int slot) @@ -146,11 +214,18 @@ static void draw_savestate_bg(int slot) /* do a frame and fetch menu bg */ pemu_forced_frame(0, 0); - make_bg(0); + make_bg(0, 1); PicoTmpStateRestore(tmp_state); } +static void menu_enter(int is_rom_loaded) +{ + plat_video_menu_enter(is_rom_loaded); + menu_w = menu_h = 0; + menu_draw_prep(); +} + // --------- loading ROM screen ---------- static int cdload_called = 0; @@ -164,8 +239,9 @@ static void load_progress_cb(int percent) len = g_menuscreen_w; menu_draw_begin(0, 1); - dst = (unsigned short *)g_menuscreen_ptr + g_menuscreen_w * me_sfont_h * 2; - for (ln = me_sfont_h - 2; ln > 0; ln--, dst += g_menuscreen_w) + copy_bg(0); + dst = (unsigned short *)g_menuscreen_ptr + g_menuscreen_pp * me_sfont_h * 2; + for (ln = me_sfont_h - 2; ln > 0; ln--, dst += g_menuscreen_pp) memset(dst, 0xff, len * 2); menu_draw_end(); } @@ -176,17 +252,19 @@ static void cdload_progress_cb(const char *fname, int percent) unsigned short *dst; menu_draw_begin(0, 1); - dst = (unsigned short *)g_menuscreen_ptr + g_menuscreen_w * me_sfont_h * 2; - memset(dst, 0xff, g_menuscreen_w * (me_sfont_h - 2) * 2); + dst = (unsigned short *)g_menuscreen_ptr + g_menuscreen_pp * me_sfont_h * 2; - smalltext_out16(1, 3 * me_sfont_h, "Processing CD image / MP3s", 0xffff); - smalltext_out16(1, 4 * me_sfont_h, fname, 0xffff); - dst += g_menuscreen_w * me_sfont_h * 3; + copy_bg(0); + menuscreen_memset_lines(dst, 0xff, me_sfont_h - 2); + + smalltext_out16(1, 3 * me_sfont_h, "Processing CD image / MP3s", PXMAKE(0xff, 0xff, 0xff)); + smalltext_out16(1, 4 * me_sfont_h, fname, PXMAKE(0xff, 0xff, 0xff)); + dst += g_menuscreen_pp * me_sfont_h * 3; if (len > g_menuscreen_w) len = g_menuscreen_w; - for (ln = (me_sfont_h - 2); ln > 0; ln--, dst += g_menuscreen_w) + for (ln = (me_sfont_h - 2); ln > 0; ln--, dst += g_menuscreen_pp) memset(dst, 0xff, len * 2); menu_draw_end(); @@ -196,18 +274,16 @@ static void cdload_progress_cb(const char *fname, int percent) void menu_romload_prepare(const char *rom_name) { const char *p = rom_name + strlen(rom_name); - int i; while (p > rom_name && *p != '/') p--; - /* fill all buffers, callbacks won't update in full */ - for (i = 0; i < 3; i++) { - menu_draw_begin(1, 1); - smalltext_out16(1, 1, "Loading", 0xffff); - smalltext_out16(1, me_sfont_h, p, 0xffff); - menu_draw_end(); - } + menu_draw_begin(1, 1); + smalltext_out16(1, 1, "Loading", PXMAKE(0xff, 0xff, 0xff)); + smalltext_out16(1, me_sfont_h, p, PXMAKE(0xff, 0xff, 0xff)); + /* copy menu to bg for callbacks. OK since we are not in menu_loop here */ + copy_bg(1); + menu_draw_end(); PicoCartLoadProgressCB = load_progress_cb; PicoCDLoadProgressCB = cdload_progress_cb; @@ -220,8 +296,9 @@ void menu_romload_end(void) PicoCDLoadProgressCB = NULL; menu_draw_begin(0, 1); + copy_bg(0); smalltext_out16(1, (cdload_called ? 6 : 3) * me_sfont_h, - "Starting emulation...", 0xffff); + "Starting emulation...", PXMAKE(0xff, 0xff, 0xff)); menu_draw_end(); } @@ -241,12 +318,12 @@ static void draw_patchlist(int sel) if (pos < 0) continue; if (pos >= max_cnt) break; active = PicoPatches[i].active; - smalltext_out16(14, pos * me_sfont_h, active ? "ON " : "OFF", active ? 0xfff6 : 0xffff); - smalltext_out16(14 + me_sfont_w*4, pos * me_sfont_h, PicoPatches[i].name, active ? 0xfff6 : 0xffff); + smalltext_out16(14, pos * me_sfont_h, active ? "ON " : "OFF", PXMAKE(0xff, 0xff, active ? 0xff : 0xb0)); + smalltext_out16(14 + me_sfont_w*4, pos * me_sfont_h, PicoPatches[i].name, PXMAKE(0xff, 0xff, active ? 0xff : 0xb0)); } pos = start + i; if (pos < max_cnt) - smalltext_out16(14, pos * me_sfont_h, "done", 0xffff); + smalltext_out16(14, pos * me_sfont_h, "done", PXMAKE(0xff, 0xff, 0xff)); text_out16(5, max_cnt / 2 * me_sfont_h, ">"); menu_draw_end(); @@ -278,7 +355,7 @@ static void menu_loop_patches(void) // -------------- key config -------------- -// PicoPad[] format: MXYZ SACB RLDU +// PicoIn.pad[] format: MXYZ SACB RLDU me_bind_action me_ctrl_actions[] = { { "UP ", 0x0001 }, @@ -301,18 +378,21 @@ me_bind_action me_ctrl_actions[] = me_bind_action emuctrl_actions[] = { - { "Load State ", PEV_STATE_LOAD }, - { "Save State ", PEV_STATE_SAVE }, - { "Prev Save Slot ", PEV_SSLOT_PREV }, - { "Next Save Slot ", PEV_SSLOT_NEXT }, - { "Switch Renderer ", PEV_SWITCH_RND }, - { "Volume Down ", PEV_VOL_DOWN }, - { "Volume Up ", PEV_VOL_UP }, - { "Fast forward ", PEV_FF }, - { "Enter Menu ", PEV_MENU }, - { "Pico Next page ", PEV_PICO_PNEXT }, - { "Pico Prev page ", PEV_PICO_PPREV }, - { "Pico Switch input", PEV_PICO_SWINP }, + { "Load State ", PEV_STATE_LOAD }, + { "Save State ", PEV_STATE_SAVE }, + { "Prev Save Slot ", PEV_SSLOT_PREV }, + { "Next Save Slot ", PEV_SSLOT_NEXT }, + { "Switch Renderer", PEV_SWITCH_RND }, + { "Volume Down ", PEV_VOL_DOWN }, + { "Volume Up ", PEV_VOL_UP }, + { "Fast forward ", PEV_FF }, + { "Reset Game ", PEV_RESET }, + { "Enter Menu ", PEV_MENU }, + { "Pico Next page ", PEV_PICO_PNEXT }, + { "Pico Prev page ", PEV_PICO_PPREV }, + { "Pico Storyware ", PEV_PICO_STORY }, + { "Pico Pad ", PEV_PICO_PAD }, + { "Pico Pen state ", PEV_PICO_PENST }, { NULL, 0 } }; @@ -325,6 +405,12 @@ static int key_config_loop_wrap(int id, int keys) case MA_CTRL_PLAYER2: key_config_loop(me_ctrl_actions, array_size(me_ctrl_actions) - 1, 1); break; + case MA_CTRL_PLAYER3: + key_config_loop(me_ctrl_actions, array_size(me_ctrl_actions) - 1, 2); + break; + case MA_CTRL_PLAYER4: + key_config_loop(me_ctrl_actions, array_size(me_ctrl_actions) - 1, 3); + break; case MA_CTRL_EMU: key_config_loop(emuctrl_actions, array_size(emuctrl_actions) - 1, -1); break; @@ -352,22 +438,23 @@ static const char *mgn_dev_name(int id, int *offs) return name; } -static int mh_saveloadcfg(int id, int keys); -static const char *mgn_saveloadcfg(int id, int *offs); +const char *indev0_names[] = { "none", "3 button pad", "6 button pad", "Team player", "4 way play", NULL }; +const char *indev1_names[] = { "none", "3 button pad", "6 button pad", NULL }; -const char *indev_names[] = { "none", "3 button pad", "6 button pad", NULL }; +static char h_play34[] = "Works only for Mega Drive/CD/32X games having\n" + "support for Team player or 4 way play"; static menu_entry e_menu_keyconfig[] = { mee_handler_id("Player 1", MA_CTRL_PLAYER1, key_config_loop_wrap), mee_handler_id("Player 2", MA_CTRL_PLAYER2, key_config_loop_wrap), - mee_handler_id("Emulator controls", MA_CTRL_EMU, key_config_loop_wrap), - mee_enum ("Input device 1", MA_OPT_INPUT_DEV0, currentConfig.input_dev0, indev_names), - mee_enum ("Input device 2", MA_OPT_INPUT_DEV1, currentConfig.input_dev1, indev_names), + mee_handler_id_h("Player 3", MA_CTRL_PLAYER3, key_config_loop_wrap, h_play34), + mee_handler_id_h("Player 4", MA_CTRL_PLAYER4, key_config_loop_wrap, h_play34), + mee_handler_id("Emulator hotkeys", MA_CTRL_EMU, key_config_loop_wrap), + mee_enum ("Input device 1", MA_OPT_INPUT_DEV0, currentConfig.input_dev0, indev0_names), + mee_enum ("Input device 2", MA_OPT_INPUT_DEV1, currentConfig.input_dev1, indev1_names), mee_range ("Turbo rate", MA_CTRL_TURBO_RATE, currentConfig.turbo_rate, 1, 30), mee_range ("Analog deadzone", MA_CTRL_DEADZONE, currentConfig.analog_deadzone, 1, 99), - mee_cust_nosave("Save global config", MA_OPT_SAVECFG, mh_saveloadcfg, mgn_saveloadcfg), - mee_cust_nosave("Save cfg for loaded game", MA_OPT_SAVECFG_GAME, mh_saveloadcfg, mgn_saveloadcfg), mee_label (""), mee_label ("Input devices:"), mee_label_mk (MA_CTRL_DEV_FIRST, mgn_dev_name), @@ -383,9 +470,14 @@ static menu_entry e_menu_keyconfig[] = static int menu_loop_keyconfig(int id, int keys) { static int sel = 0; + int it = 0, x = me_id2offset(e_menu_keyconfig, MA_CTRL_DEV_FIRST); - me_enable(e_menu_keyconfig, MA_OPT_SAVECFG_GAME, PicoGameLoaded); - me_loop(e_menu_keyconfig, &sel); + while (in_get_dev_name(it, 1, 1)) + it++; + for (it += x; x && e_menu_keyconfig[x].name; x++) + e_menu_keyconfig[x].enabled = x < it; + + me_loop_d(e_menu_keyconfig, &sel, menu_draw_prep, NULL); PicoSetInputDevice(0, currentConfig.input_dev0); PicoSetInputDevice(1, currentConfig.input_dev1); @@ -393,58 +485,58 @@ static int menu_loop_keyconfig(int id, int keys) return 0; } -// ------------ SCD options menu ------------ +// ------------ MD options menu ------------ -static const char *mgn_cdopt_ra(int id, int *offs) -{ - *offs = -5; - if (PicoCDBuffers <= 0) - return " OFF"; - sprintf(static_buff, "%5iK", PicoCDBuffers * 2); - return static_buff; -} +static const char h_renderer[] = "16bit is more accurate, 8bit is faster"; +static const char h_fmsound[] = "Disabling improves performance, but breaks sound"; +static const char h_dacnoise[] = "FM chips in the 1st Mega Drive model have DAC noise,\n" + "newer models used different chips without this"; +static const char h_fmfilter[] = "Improves sound accuracy but is noticeably slower,\n" + "best quality if native rate isn't working"; +static const char h_picopen[] = "Enabling resets Pico display and d-pad input back to\n" + "screen if the Pico pen button is pressed"; -static int mh_cdopt_ra(int id, int keys) +static menu_entry e_menu_md_options[] = { - if (keys & PBTN_LEFT) { - PicoCDBuffers >>= 1; - if (PicoCDBuffers < 2) - PicoCDBuffers = 0; - } else { - if (PicoCDBuffers <= 0) - PicoCDBuffers = 1; - PicoCDBuffers <<= 1; - if (PicoCDBuffers > 8*1024) - PicoCDBuffers = 8*1024; // 16M - } + mee_enum_h ("Renderer", MA_OPT_RENDERER, currentConfig.renderer, renderer_names, h_renderer), + mee_onoff_h ("FM audio", MA_OPT2_ENABLE_YM2612, PicoIn.opt, POPT_EN_FM, h_fmsound), + mee_onoff_h ("FM filter", MA_OPT_FM_FILTER, PicoIn.opt, POPT_EN_FM_FILTER, h_fmfilter), + mee_onoff_h ("FM DAC noise", MA_OPT2_ENABLE_YM_DAC, PicoIn.opt, POPT_EN_FM_DAC, h_dacnoise), + mee_onoff_h ("Pen button shows screen", MA_OPT_PICO_PEN, currentConfig.EmuOpt, EOPT_PICO_PEN, h_picopen), + mee_end, +}; + +static int menu_loop_md_options(int id, int keys) +{ + static int sel = 0; + if (renderer_names[0] == NULL) + me_enable(e_menu_md_options, MA_OPT_RENDERER, 0); + me_loop_d(e_menu_md_options, &sel, menu_draw_prep, NULL); + return 0; } +// ------------ SCD options menu ------------ + static const char h_cdleds[] = "Show power/CD LEDs of emulated console"; static const char h_cdda[] = "Play audio tracks from mp3s/wavs/bins"; static const char h_cdpcm[] = "Emulate PCM audio chip for effects/voices/music"; static const char h_srcart[] = "Emulate the save RAM cartridge accessory\n" "most games don't need this"; -static const char h_scfx[] = "Emulate scale/rotate ASIC chip for graphics effects\n" - "disable to improve performance"; -static const char h_bsync[] = "More accurate mode for CPUs (needed for some games)\n" - "disable to improve performance"; static menu_entry e_menu_cd_options[] = { + mee_onoff_h("SaveRAM cart", MA_CDOPT_SAVERAM, PicoIn.opt, POPT_EN_MCD_RAMCART, h_srcart), mee_onoff_h("CD LEDs", MA_CDOPT_LEDS, currentConfig.EmuOpt, EOPT_EN_CD_LEDS, h_cdleds), - mee_onoff_h("CDDA audio", MA_CDOPT_CDDA, PicoOpt, POPT_EN_MCD_CDDA, h_cdda), - mee_onoff_h("PCM audio", MA_CDOPT_PCM, PicoOpt, POPT_EN_MCD_PCM, h_cdpcm), - mee_cust ("ReadAhead buffer", MA_CDOPT_READAHEAD, mh_cdopt_ra, mgn_cdopt_ra), - mee_onoff_h("SaveRAM cart", MA_CDOPT_SAVERAM, PicoOpt, POPT_EN_MCD_RAMCART, h_srcart), - mee_onoff_h("Scale/Rot. fx (slow)", MA_CDOPT_SCALEROT_CHIP, PicoOpt, POPT_EN_MCD_GFX, h_scfx), + mee_onoff_h("CDDA audio", MA_CDOPT_CDDA, PicoIn.opt, POPT_EN_MCD_CDDA, h_cdda), + mee_onoff_h("PCM audio", MA_CDOPT_PCM, PicoIn.opt, POPT_EN_MCD_PCM, h_cdpcm), mee_end, }; static int menu_loop_cd_options(int id, int keys) { static int sel = 0; - me_loop(e_menu_cd_options, &sel); + me_loop_d(e_menu_cd_options, &sel, menu_draw_prep, NULL); return 0; } @@ -480,19 +572,14 @@ static const char *mgn_opt_sh2cycles(int id, int *offs) return static_buff; } -static const char h_32x_enable[] = "Enable emulation of the 32X addon"; static const char h_pwm[] = "Disabling may improve performance, but break sound"; -static const char h_sh2cycles[] = "Cycles/millisecond (similar to DOSBox)\n" - "lower values speed up emulation but break games\n" - "at least 11000 recommended for compatibility"; +static const char h_pwmopt[] = "Enabling may improve performance, but break sound"; static menu_entry e_menu_32x_options[] = { - mee_onoff_h ("32X enabled", MA_32XOPT_ENABLE_32X, PicoOpt, POPT_EN_32X, h_32x_enable), mee_enum ("32X renderer", MA_32XOPT_RENDERER, currentConfig.renderer32x, renderer_names32x), - mee_onoff_h ("PWM sound", MA_32XOPT_PWM, PicoOpt, POPT_EN_PWM, h_pwm), - mee_cust_h ("Master SH2 cycles", MA_32XOPT_MSH2_CYCLES, mh_opt_sh2cycles, mgn_opt_sh2cycles, h_sh2cycles), - mee_cust_h ("Slave SH2 cycles", MA_32XOPT_SSH2_CYCLES, mh_opt_sh2cycles, mgn_opt_sh2cycles, h_sh2cycles), + mee_onoff_h ("PWM audio", MA_32XOPT_PWM, PicoIn.opt, POPT_EN_PWM, h_pwm), + mee_onoff_h ("PWM IRQ optimization", MA_OPT2_PWM_IRQ_OPT, PicoIn.opt, POPT_PWM_IRQ_OPT, h_pwmopt), mee_end, }; @@ -500,8 +587,9 @@ static int menu_loop_32x_options(int id, int keys) { static int sel = 0; - me_enable(e_menu_32x_options, MA_32XOPT_RENDERER, renderer_names32x[0] != NULL); - me_loop(e_menu_32x_options, &sel); + if (renderer_names32x[0] == NULL) + me_enable(e_menu_32x_options, MA_32XOPT_RENDERER, 0); + me_loop_d(e_menu_32x_options, &sel, menu_draw_prep, NULL); Pico32xSetClocks(currentConfig.msh2_khz * 1000, currentConfig.msh2_khz * 1000); @@ -510,21 +598,61 @@ static int menu_loop_32x_options(int id, int keys) #endif +// ------------ SMS options menu ------------ + +#ifndef NO_SMS + +static const char *sms_hardwares[] = { "auto", "Game Gear", "Master System", "SG-1000", "SC-3000", NULL }; +static const char *gg_ghosting_opts[] = { "OFF", "weak", "normal", NULL }; +static const char *sms_mappers[] = { "auto", "Sega", "Codemasters", "Korea", "Korea MSX", "Korea X-in-1", "Korea 4-Pak", "Korea Janggun", "Korea Nemesis", "Taiwan 8K RAM", "Korea XOR", "Sega 32K RAM", NULL }; +static const char *sms_tmspalette[] = { "SMS", "SG-1000", NULL }; + +static const char h_smsfm[] = "FM sound is only supported by few games,\n" + "some games may crash with FM enabled"; +static const char h_ghost[] = "Simulate the inertia of the GG LCD display"; +static const char h_smspal[] = "Selects the color palette used for SMS games\n" + "using the original TMS9918 graphics modes"; + +static menu_entry e_menu_sms_options[] = +{ + mee_enum ("System", MA_SMSOPT_HARDWARE, PicoIn.hwSelect, sms_hardwares), + mee_enum ("Cartridge mapping", MA_SMSOPT_MAPPER, PicoIn.mapper, sms_mappers), + mee_enum_h ("Game Gear LCD ghosting", MA_SMSOPT_GHOSTING, currentConfig.ghosting, gg_ghosting_opts, h_ghost), + mee_onoff_h ("FM Sound Unit", MA_OPT2_ENABLE_YM2413, PicoIn.opt, POPT_EN_YM2413, h_smsfm), + mee_enum_h ("SMS palette in TMS mode", MA_SMSOPT_TMSPALETTE, PicoIn.tmsPalette, sms_tmspalette, h_smspal), + mee_end, +}; + +static int menu_loop_sms_options(int id, int keys) +{ + static int sel = 0; + + me_loop_d(e_menu_sms_options, &sel, menu_draw_prep, NULL); + + return 0; +} + +#endif + // ------------ adv options menu ------------ +static const char h_gglcd[] = "Show full VDP image with borders if disabled"; +static const char h_ovrclk[] = "Will break some games, keep at 0"; +static const char h_dynarec[] = "Disabling dynarecs massively slows down 32X"; +static const char h_sh2cycles[] = "Cycles/millisecond (similar to DOSBox)\n" + "lower values speed up emulation but break games\n" + "at least 11000 recommended for compatibility"; + static menu_entry e_menu_adv_options[] = { - mee_onoff ("SRAM/BRAM saves", MA_OPT_SRAM_STATES, currentConfig.EmuOpt, EOPT_EN_SRAM), - mee_onoff ("Disable sprite limit", MA_OPT2_NO_SPRITE_LIM, PicoOpt, POPT_DIS_SPRITE_LIM), - mee_onoff ("Emulate Z80", MA_OPT2_ENABLE_Z80, PicoOpt, POPT_EN_Z80), - mee_onoff ("Emulate YM2612 (FM)", MA_OPT2_ENABLE_YM2612, PicoOpt, POPT_EN_FM), - mee_onoff ("Emulate SN76496 (PSG)", MA_OPT2_ENABLE_SN76496,PicoOpt, POPT_EN_PSG), - mee_onoff ("gzip savestates", MA_OPT2_GZIP_STATES, currentConfig.EmuOpt, EOPT_GZIP_SAVES), - mee_onoff ("Don't save last used ROM", MA_OPT2_NO_LAST_ROM, currentConfig.EmuOpt, EOPT_NO_AUTOSVCFG), - mee_onoff ("Disable idle loop patching",MA_OPT2_NO_IDLE_LOOPS,PicoOpt, POPT_DIS_IDLE_DET), mee_onoff ("Disable frame limiter", MA_OPT2_NO_FRAME_LIMIT,currentConfig.EmuOpt, EOPT_NO_FRMLIMIT), - mee_onoff ("Enable dynarecs", MA_OPT2_DYNARECS, PicoOpt, POPT_EN_DRC), - mee_onoff ("Status line in main menu", MA_OPT2_STATUS_LINE, currentConfig.EmuOpt, EOPT_SHOW_RTC), + mee_onoff ("Disable sprite limit", MA_OPT2_NO_SPRITE_LIM, PicoIn.opt, POPT_DIS_SPRITE_LIM), + mee_onoff ("Disable idle loop patching",MA_OPT2_NO_IDLE_LOOPS,PicoIn.opt, POPT_DIS_IDLE_DET), + mee_onoff_h ("Emulate Game Gear LCD", MA_OPT2_ENABLE_GGLCD ,PicoIn.opt, POPT_EN_GG_LCD, h_gglcd), + mee_range_h ("Overclock M68k (%)", MA_OPT2_OVERCLOCK_M68K,currentConfig.overclock_68k, 0, 1000, h_ovrclk), + mee_onoff_h ("Enable dynarecs", MA_OPT2_DYNARECS, PicoIn.opt, POPT_EN_DRC, h_dynarec), + mee_cust_h ("Master SH2 cycles", MA_32XOPT_MSH2_CYCLES, mh_opt_sh2cycles, mgn_opt_sh2cycles, h_sh2cycles), + mee_cust_h ("Slave SH2 cycles", MA_32XOPT_SSH2_CYCLES, mh_opt_sh2cycles, mgn_opt_sh2cycles, h_sh2cycles), MENU_OPTIONS_ADV mee_end, }; @@ -532,153 +660,110 @@ static menu_entry e_menu_adv_options[] = static int menu_loop_adv_options(int id, int keys) { static int sel = 0; - me_loop(e_menu_adv_options, &sel); - return 0; -} -// ------------ gfx options menu ------------ - -static const char h_gamma[] = "Gamma/brightness adjustment (default 1.00)"; - -static const char *mgn_aopt_gamma(int id, int *offs) -{ - sprintf(static_buff, "%i.%02i", currentConfig.gamma / 100, currentConfig.gamma % 100); - return static_buff; -} - -static menu_entry e_menu_gfx_options[] = -{ - mee_enum ("Video output mode", MA_OPT_VOUT_MODE, plat_target.vout_method, men_dummy), - mee_enum ("Renderer", MA_OPT_RENDERER, currentConfig.renderer, renderer_names), - mee_enum ("Filter", MA_OPT3_FILTERING, currentConfig.filter, men_dummy), - mee_range_cust_h("Gamma correction", MA_OPT2_GAMMA, currentConfig.gamma, 1, 300, mgn_aopt_gamma, h_gamma), - MENU_OPTIONS_GFX - mee_end, -}; - -static int menu_loop_gfx_options(int id, int keys) -{ - static int sel = 0; - - me_enable(e_menu_gfx_options, MA_OPT_RENDERER, renderer_names[0] != NULL); - me_loop(e_menu_gfx_options, &sel); + me_loop_d(e_menu_adv_options, &sel, menu_draw_prep, NULL); + PicoIn.overclockM68k = currentConfig.overclock_68k; // int vs short return 0; } -// ------------ options menu ------------ - -static menu_entry e_menu_options[]; +// ------------ sound options menu ------------ static int sndrate_prevnext(int rate, int dir) { - static const int rates[] = { 8000, 11025, 16000, 22050, 44100 }; + const int *rates = plat_target.sound_rates; + int rate_count; int i; - for (i = 0; i < 5; i++) + for (rate_count = 0; rates[rate_count] != -1; rate_count++) + ; + for (i = 0; i < rate_count; i++) if (rates[i] == rate) break; i += dir ? 1 : -1; - if (i > 4) { - if (!(PicoOpt & POPT_EN_STEREO)) { - PicoOpt |= POPT_EN_STEREO; + if (i >= rate_count) { + if (!(PicoIn.opt & POPT_EN_STEREO)) { + PicoIn.opt |= POPT_EN_STEREO; return rates[0]; } - return rates[4]; + return rates[rate_count-1]; } if (i < 0) { - if (PicoOpt & POPT_EN_STEREO) { - PicoOpt &= ~POPT_EN_STEREO; - return rates[4]; + if (PicoIn.opt & POPT_EN_STEREO) { + PicoIn.opt &= ~POPT_EN_STEREO; + return rates[rate_count-1]; } return rates[0]; } return rates[i]; } -static void region_prevnext(int right) +static int mh_opt_snd(int id, int keys) { - // jp_ntsc=1, jp_pal=2, usa=4, eu=8 - static const int rgn_orders[] = { 0x148, 0x184, 0x814, 0x418, 0x841, 0x481 }; - int i; - - if (right) { - if (!PicoRegionOverride) { - for (i = 0; i < 6; i++) - if (rgn_orders[i] == PicoAutoRgnOrder) break; - if (i < 5) PicoAutoRgnOrder = rgn_orders[i+1]; - else PicoRegionOverride=1; - } - else - PicoRegionOverride <<= 1; - if (PicoRegionOverride > 8) - PicoRegionOverride = 8; - } else { - if (!PicoRegionOverride) { - for (i = 0; i < 6; i++) - if (rgn_orders[i] == PicoAutoRgnOrder) break; - if (i > 0) PicoAutoRgnOrder = rgn_orders[i-1]; - } - else - PicoRegionOverride >>= 1; - } -} - -static int mh_opt_misc(int id, int keys) -{ - switch (id) { - case MA_OPT_SOUND_QUALITY: - PsndRate = sndrate_prevnext(PsndRate, keys & PBTN_RIGHT); - break; - case MA_OPT_REGION: - region_prevnext(keys & PBTN_RIGHT); - break; - default: - break; - } + PicoIn.sndRate = sndrate_prevnext(PicoIn.sndRate, keys & PBTN_RIGHT); return 0; } -static int mh_saveloadcfg(int id, int keys) +static const char *mgn_opt_sound(int id, int *offs) { - int ret; - - if (keys & (PBTN_LEFT|PBTN_RIGHT)) { // multi choice - config_slot += (keys & PBTN_LEFT) ? -1 : 1; - if (config_slot < 0) config_slot = 9; - else if (config_slot > 9) config_slot = 0; - me_enable(e_menu_options, MA_OPT_LOADCFG, config_slot != config_slot_current); - return 0; - } - - switch (id) { - case MA_OPT_SAVECFG: - case MA_OPT_SAVECFG_GAME: - if (emu_write_config(id == MA_OPT_SAVECFG_GAME ? 1 : 0)) - menu_update_msg("config saved"); - else - menu_update_msg("failed to write config"); - break; - case MA_OPT_LOADCFG: - ret = emu_read_config(rom_fname_loaded, 1); - if (!ret) ret = emu_read_config(NULL, 1); - if (ret) menu_update_msg("config loaded"); - else menu_update_msg("failed to load config"); - break; - default: - return 0; - } - - return 1; + const char *str2; + *offs = -8; + str2 = (PicoIn.opt & POPT_EN_STEREO) ? "stereo" : "mono"; + if (PicoIn.sndRate > 52000 && PicoIn.sndRate < 54000) + sprintf(static_buff, "native %s", str2); + else sprintf(static_buff, "%5iHz %s", PicoIn.sndRate, str2); + return static_buff; } -static int mh_restore_defaults(int id, int keys) +static int mh_opt_alpha(int id, int keys) { - emu_set_defconfig(); - menu_update_msg("defaults restored"); - return 1; + int val = (PicoIn.sndFilterAlpha * 100 + 0x08000) / 0x10000; + if (keys & PBTN_LEFT) val--; + if (keys & PBTN_RIGHT) val++; + if (val < 1) val = 1; + if (val > 99) val = 99; + PicoIn.sndFilterAlpha = val * 0x10000 / 100; + return 0; } +static const char *mgn_opt_alpha(int id, int *offs) +{ + int val = (PicoIn.sndFilterAlpha * 100 + 0x08000) / 0x10000; + sprintf(static_buff, "0.%02d", val); + return static_buff; +} + +static const char h_ensound[] = "Disabling turns off sound output, however all\n" + "enabled sound components are still emulated"; +static const char h_quality[] = "native: Mega Drive FM hardware rate (~53000Hz),\n" + "best quality, but may not work on some devices"; +static const char h_lowpass[] = "Low pass filter for sound closer to real hardware"; +static const char h_lpalpha[] = "Higher values have more impact"; + +static menu_entry e_menu_snd_options[] = +{ + mee_onoff_h ("Enable sound", MA_OPT_ENABLE_SOUND, currentConfig.EmuOpt, EOPT_EN_SOUND, h_ensound), + mee_cust_h ("Sound quality", MA_OPT_SOUND_QUALITY, mh_opt_snd, mgn_opt_sound, h_quality), + mee_onoff_h ("Sound filter", MA_OPT_SOUND_FILTER, PicoIn.opt, POPT_EN_SNDFILTER, h_lowpass), + mee_cust_h ("Filter strength", MA_OPT_SOUND_ALPHA, mh_opt_alpha, mgn_opt_alpha, h_lpalpha), + mee_end, +}; + +static int menu_loop_snd_options(int id, int keys) +{ + static int sel = 0; + + if (PicoIn.sndRate > 52000 && PicoIn.sndRate < 54000) + PicoIn.sndRate = 53000; + me_loop_d(e_menu_snd_options, &sel, menu_draw_prep, NULL); + + return 0; +} + +// ------------ gfx options menu ------------ + +static const char h_gamma[] = "Gamma/brightness adjustment (default 1.00)"; + static const char *mgn_opt_fskip(int id, int *offs) { if (currentConfig.Frameskip < 0) @@ -687,20 +772,177 @@ static const char *mgn_opt_fskip(int id, int *offs) return static_buff; } -static const char *mgn_opt_sound(int id, int *offs) +static const char *mgn_aopt_gamma(int id, int *offs) { - const char *str2; - *offs = -8; - str2 = (PicoOpt & POPT_EN_STEREO) ? "stereo" : "mono"; - sprintf(static_buff, "%5iHz %s", PsndRate, str2); + sprintf(static_buff, "%i.%02i", currentConfig.gamma / 100, currentConfig.gamma % 100); return static_buff; } +static menu_entry e_menu_gfx_options[] = +{ + mee_enum ("Video output mode", MA_OPT_VOUT_MODE, plat_target.vout_method, men_dummy), + mee_range_cust("Frameskip", MA_OPT_FRAMESKIP, currentConfig.Frameskip, -1, 16, mgn_opt_fskip), + mee_range ("Max auto frameskip",MA_OPT2_MAX_FRAMESKIP, currentConfig.max_skip, 1, 10), + mee_enum ("Filter", MA_OPT3_FILTERING, currentConfig.filter, men_dummy), + mee_range_cust_h("Gamma correction",MA_OPT2_GAMMA, currentConfig.gamma, 1, 300, mgn_aopt_gamma, h_gamma), + MENU_OPTIONS_GFX + mee_end, +}; + +static int menu_loop_gfx_options(int id, int keys) +{ + static int sel = 0; + + me_loop_d(e_menu_gfx_options, &sel, menu_draw_prep, NULL); + + return 0; +} + +// ------------ UI options menu ------------ + +static const char *men_confirm_save[] = { "OFF", "writes", "loads", "both", NULL }; +static const char h_confirm_save[] = "Ask for confirmation when overwriting save,\n" + "loading state or both"; + +static menu_entry e_menu_ui_options[] = +{ + mee_onoff ("Show FPS", MA_OPT_SHOW_FPS, currentConfig.EmuOpt, EOPT_SHOW_FPS), + mee_enum_h ("Confirm save/load", MA_OPT_CONFIRM_STATES, currentConfig.confirm_save, men_confirm_save, h_confirm_save), + mee_onoff ("Don't save last used game", MA_OPT2_NO_LAST_ROM, currentConfig.EmuOpt, EOPT_NO_AUTOSVCFG), + mee_end, +}; + +static int menu_loop_ui_options(int id, int keys) +{ + static int sel = 0; + + me_loop_d(e_menu_ui_options, &sel, menu_draw_prep, NULL); + + return 0; +} + +// ------------ options menu ------------ + +static int find_renderer(const char *names[], const char *which) +{ + int i = 0; + for (i = 0; *names; names++, i++) + if (strstr(*names, which)) return i; + return 0; +} + +static int mh_profile(int id, int keys) { + switch (id) { + case MA_PROFILE_ACCURATE: + currentConfig.renderer = find_renderer(renderer_names, "16bit"); + currentConfig.renderer32x = find_renderer(renderer_names32x, "accurate"); + PicoIn.sndRate = 44100; + PicoIn.opt |= POPT_EN_FM_FILTER | POPT_EN_FM | POPT_EN_MCD_CDDA; + PicoIn.opt &= ~POPT_PWM_IRQ_OPT; + break; + case MA_PROFILE_BALANCED: + currentConfig.renderer = find_renderer(renderer_names, "8bit"); + currentConfig.renderer32x = find_renderer(renderer_names32x, "fast"); + PicoIn.sndRate = 44100; + PicoIn.opt |= POPT_EN_FM | POPT_EN_MCD_CDDA; + PicoIn.opt &= ~(POPT_PWM_IRQ_OPT | POPT_EN_FM_FILTER); + break; + case MA_PROFILE_FAST: + currentConfig.renderer = find_renderer(renderer_names, "fast"); + currentConfig.renderer32x = find_renderer(renderer_names32x, "fastest"); + PicoIn.sndRate = 22050; + PicoIn.opt |= POPT_PWM_IRQ_OPT | POPT_EN_FM | POPT_EN_MCD_CDDA; + PicoIn.opt &= ~POPT_EN_FM_FILTER; + break; + case MA_PROFILE_BREAKING: + currentConfig.renderer = find_renderer(renderer_names, "fast"); + currentConfig.renderer32x = find_renderer(renderer_names32x, "fastest"); + PicoIn.sndRate = 16000; + PicoIn.opt |= POPT_PWM_IRQ_OPT; + PicoIn.opt &= ~(POPT_EN_FM_FILTER | POPT_EN_FM | POPT_EN_MCD_CDDA); + break; + } + return 1; +} + +static menu_entry e_menu_profile[] = +{ + mee_label ("Select option profile and press OK:"), + mee_handler_id("accurate", MA_PROFILE_ACCURATE, mh_profile), + mee_handler_id("balanced", MA_PROFILE_BALANCED, mh_profile), + mee_handler_id("fast", MA_PROFILE_FAST, mh_profile), + mee_handler_id("breaking", MA_PROFILE_BREAKING, mh_profile), + mee_label (""), + mee_label ("Options changed by Option profiles:"), + mee_label (""), + mee_label ("Sound: Sound quality"), + mee_label ("MD: Renderer, FM audio, FM filter"), + mee_label ("32X: Renderer, PWM IRQ optimization"), + mee_label ("CD: CDDA audio"), + mee_end, +}; + +static int menu_loop_profile_options(int id, int keys) +{ + static int sel = 0; + + me_loop_d(e_menu_profile, &sel, menu_draw_prep, NULL); + + return 0; +} + +static void region_prevnext(int right) +{ + // jp_ntsc=1, jp_pal=2, usa=4, eu=8 + static const int rgn_orders[] = { 0x148, 0x184, 0x814, 0x418, 0x841, 0x481 }; + int i; + + if (right) { + if (!PicoIn.regionOverride) { + for (i = 0; i < 6; i++) + if (rgn_orders[i] == PicoIn.autoRgnOrder) break; + if (i < 5) PicoIn.autoRgnOrder = rgn_orders[i+1]; + else PicoIn.regionOverride=1; + } + else + PicoIn.regionOverride <<= 1; + if (PicoIn.regionOverride > 8) + PicoIn.regionOverride = 8; + } else { + if (!PicoIn.regionOverride) { + for (i = 0; i < 6; i++) + if (rgn_orders[i] == PicoIn.autoRgnOrder) break; + if (i > 0) PicoIn.autoRgnOrder = rgn_orders[i-1]; + } + else + PicoIn.regionOverride >>= 1; + } +} + +static int mh_opt_misc(int id, int keys) +{ + switch (id) { + case MA_OPT_REGION: + region_prevnext(keys & PBTN_RIGHT); + break; + default: + break; + } + return 0; +} + +static int mh_restore_defaults(int id, int keys) +{ + emu_set_defconfig(); + menu_update_msg("defaults restored"); + return 1; +} + static const char *mgn_opt_region(int id, int *offs) { static const char *names[] = { "Auto", " Japan NTSC", " Japan PAL", " USA", " Europe" }; static const char *names_short[] = { "", " JP", " JP", " US", " EU" }; - int code = PicoRegionOverride; + int code = PicoIn.regionOverride; int u, i = 0; *offs = -6; @@ -713,7 +955,7 @@ static const char *mgn_opt_region(int id, int *offs) } else { strcpy(static_buff, "Auto:"); for (u = 0; u < 3; u++) { - code = (PicoAutoRgnOrder >> u*4) & 0xf; + code = (PicoIn.autoRgnOrder >> u*4) & 0xf; for (i = 0; code; code >>= 1, i++) ; strcat(static_buff, names_short[i]); @@ -722,37 +964,29 @@ static const char *mgn_opt_region(int id, int *offs) } } -static const char *mgn_saveloadcfg(int id, int *offs) -{ - static_buff[0] = 0; - if (config_slot != 0) - sprintf(static_buff, "[%i]", config_slot); - return static_buff; -} - -static const char *men_confirm_save[] = { "OFF", "writes", "loads", "both", NULL }; -static const char h_confirm_save[] = "Ask for confirmation when overwriting save,\n" - "loading state or both"; +static const char h_hotkeysvld[] = "Slot used for save/load by emulator hotkey"; static menu_entry e_menu_options[] = { - mee_range ("Save slot", MA_OPT_SAVE_SLOT, state_slot, 0, 9), - mee_range_cust("Frameskip", MA_OPT_FRAMESKIP, currentConfig.Frameskip, -1, 16, mgn_opt_fskip), mee_cust ("Region", MA_OPT_REGION, mh_opt_misc, mgn_opt_region), - mee_onoff ("Show FPS", MA_OPT_SHOW_FPS, currentConfig.EmuOpt, EOPT_SHOW_FPS), - mee_onoff ("Enable sound", MA_OPT_ENABLE_SOUND, currentConfig.EmuOpt, EOPT_EN_SOUND), - mee_cust ("Sound Quality", MA_OPT_SOUND_QUALITY, mh_opt_misc, mgn_opt_sound), - mee_enum_h ("Confirm savestate", MA_OPT_CONFIRM_STATES,currentConfig.confirm_save, men_confirm_save, h_confirm_save), mee_range ("", MA_OPT_CPU_CLOCKS, currentConfig.CPUclock, 20, 3200), - mee_handler ("[Display options]", menu_loop_gfx_options), - mee_handler ("[Sega/Mega CD options]", menu_loop_cd_options), + mee_range_h ("Hotkey save/load slot", MA_OPT_SAVE_SLOT, state_slot, 0, 9, h_hotkeysvld), + mee_handler ("Configure controls", menu_loop_keyconfig), + mee_label (""), + mee_handler ("Option profiles", menu_loop_profile_options), + mee_handler ("Interface options", menu_loop_ui_options), + mee_handler ("Display options", menu_loop_gfx_options), + mee_handler ("Sound options", menu_loop_snd_options), + mee_handler ("MD/Genesis/Pico options", menu_loop_md_options), + mee_handler (" Sega/Mega CD add-on", menu_loop_cd_options), #ifndef NO_32X - mee_handler ("[32X options]", menu_loop_32x_options), + mee_handler (" 32X add-on", menu_loop_32x_options), #endif - mee_handler ("[Advanced options]", menu_loop_adv_options), - mee_cust_nosave("Save global config", MA_OPT_SAVECFG, mh_saveloadcfg, mgn_saveloadcfg), - mee_cust_nosave("Save cfg for loaded game",MA_OPT_SAVECFG_GAME, mh_saveloadcfg, mgn_saveloadcfg), - mee_cust_nosave("Load cfg from profile", MA_OPT_LOADCFG, mh_saveloadcfg, mgn_saveloadcfg), +#ifndef NO_SMS + mee_handler ("SG/SMS/GG options", menu_loop_sms_options), +#endif + mee_handler ("Advanced options", menu_loop_adv_options), + mee_handler ("Restore defaults", mh_restore_defaults), mee_end, }; @@ -761,10 +995,7 @@ static int menu_loop_options(int id, int keys) { static int sel = 0; - me_enable(e_menu_options, MA_OPT_SAVECFG_GAME, PicoGameLoaded); - me_enable(e_menu_options, MA_OPT_LOADCFG, config_slot != config_slot_current); - - me_loop(e_menu_options, &sel); + me_loop_d(e_menu_options, &sel, menu_draw_prep, NULL); return 0; } @@ -808,7 +1039,7 @@ static void draw_text_debug(const char *str, int skip, int from) str = p; for (line = from; line < g_menuscreen_h / me_sfont_h; line++) { - smalltext_out16(1, line * me_sfont_h, str, 0xffff); + smalltext_out16(1, line * me_sfont_h, str, PXMAKE(0xff, 0xff, 0xff)); while (*p && *p != '\n') p++; if (*p == 0) @@ -826,21 +1057,24 @@ static void draw_text_debug(const char *str, int skip, int from) static void draw_frame_debug(void) { char layer_str[48] = "layers: "; - if (PicoDrawMask & PDRAW_LAYERB_ON) memcpy(layer_str + 8, "B", 1); - if (PicoDrawMask & PDRAW_LAYERA_ON) memcpy(layer_str + 10, "A", 1); - if (PicoDrawMask & PDRAW_SPRITES_LOW_ON) memcpy(layer_str + 12, "spr_lo", 6); - if (PicoDrawMask & PDRAW_SPRITES_HI_ON) memcpy(layer_str + 19, "spr_hi", 6); - if (PicoDrawMask & PDRAW_32X_ON) memcpy(layer_str + 26, "32x", 4); + struct PicoVideo *pv = &Pico.video; + + if (!(pv->debug_p & PVD_KILL_B)) memcpy(layer_str + 8, "B", 1); + if (!(pv->debug_p & PVD_KILL_A)) memcpy(layer_str + 10, "A", 1); + if (!(pv->debug_p & PVD_KILL_S_LO)) memcpy(layer_str + 12, "spr_lo", 6); + if (!(pv->debug_p & PVD_KILL_S_HI)) memcpy(layer_str + 19, "spr_hi", 6); + if (!(pv->debug_p & PVD_KILL_32X)) memcpy(layer_str + 26, "32x", 4); pemu_forced_frame(1, 0); - make_bg(1); + make_bg(1, 1); - smalltext_out16(4, 1, "build: r" REVISION " "__DATE__ " " __TIME__ " " COMPILER, 0xffff); - smalltext_out16(4, g_menuscreen_h - me_sfont_h, layer_str, 0xffff); + smalltext_out16(4, 1, "build: r" REVISION " "__DATE__ " " __TIME__ " " COMPILER, PXMAKE(0xff, 0xff, 0xff)); + smalltext_out16(4, g_menuscreen_h - me_sfont_h, layer_str, PXMAKE(0xff, 0xff, 0xff)); } static void debug_menu_loop(void) { + struct PicoVideo *pv = &Pico.video; int inp, mode = 0; int spr_offs = 0, dumped = 0; char *tmp; @@ -848,6 +1082,10 @@ static void debug_menu_loop(void) while (1) { menu_draw_begin(1, 0); + g_screen_ptr = g_menuscreen_ptr; + g_screen_width = g_menuscreen_w; + g_screen_height = g_menuscreen_h; + g_screen_ppitch = g_menuscreen_pp; switch (mode) { case 0: tmp = PDebugMain(); @@ -855,21 +1093,23 @@ static void debug_menu_loop(void) draw_text_debug(tmp, 0, 0); if (dumped) { smalltext_out16(g_menuscreen_w - 6 * me_sfont_h, - g_menuscreen_h - me_mfont_h, "dumped", 0xffff); + g_menuscreen_h - me_mfont_h, "dumped", PXMAKE(0xff, 0xff, 0xff)); dumped = 0; } break; case 1: draw_frame_debug(); break; case 2: pemu_forced_frame(1, 0); - make_bg(1); - PDebugShowSpriteStats((unsigned short *)g_menuscreen_ptr + (g_menuscreen_h/2 - 240/2)*g_menuscreen_w + - g_menuscreen_w/2 - 320/2, g_menuscreen_w); + make_bg(1, 1); + PDebugShowSpriteStats((unsigned short *)g_menuscreen_ptr + + (g_menuscreen_h/2 - 240/2) * g_menuscreen_pp + + g_menuscreen_w/2 - 320/2, g_menuscreen_pp); break; - case 3: memset(g_menuscreen_ptr, 0, g_menuscreen_w * g_menuscreen_h * 2); - PDebugShowPalette(g_menuscreen_ptr, g_menuscreen_w); - PDebugShowSprite((unsigned short *)g_menuscreen_ptr + g_menuscreen_w*120 + g_menuscreen_w/2 + 16, - g_menuscreen_w, spr_offs); + case 3: menuscreen_memset_lines(g_menuscreen_ptr, 0, g_menuscreen_h); + PDebugShowPalette(g_menuscreen_ptr, g_menuscreen_pp); + PDebugShowSprite((unsigned short *)g_menuscreen_ptr + + g_menuscreen_pp * 120 + g_menuscreen_w / 2 + 16, + g_menuscreen_pp, spr_offs); draw_text_debug(PDebugSpriteList(), spr_offs, 6); break; case 4: tmp = PDebug32x(); @@ -901,16 +1141,16 @@ static void debug_menu_loop(void) } break; case 1: - if (inp & PBTN_LEFT) PicoDrawMask ^= PDRAW_LAYERB_ON; - if (inp & PBTN_RIGHT) PicoDrawMask ^= PDRAW_LAYERA_ON; - if (inp & PBTN_DOWN) PicoDrawMask ^= PDRAW_SPRITES_LOW_ON; - if (inp & PBTN_UP) PicoDrawMask ^= PDRAW_SPRITES_HI_ON; - if (inp & PBTN_MA2) PicoDrawMask ^= PDRAW_32X_ON; + if (inp & PBTN_LEFT) pv->debug_p ^= PVD_KILL_B; + if (inp & PBTN_RIGHT) pv->debug_p ^= PVD_KILL_A; + if (inp & PBTN_DOWN) pv->debug_p ^= PVD_KILL_S_LO; + if (inp & PBTN_UP) pv->debug_p ^= PVD_KILL_S_HI; + if (inp & PBTN_MA2) pv->debug_p ^= PVD_KILL_32X; if (inp & PBTN_MOK) { - PsndOut = NULL; // just in case - PicoSkipFrame = 1; + PicoIn.sndOut = NULL; // just in case + PicoIn.skipFrame = 1; PicoFrame(); - PicoSkipFrame = 0; + PicoIn.skipFrame = 0; while (inp & PBTN_MOK) inp = in_menu_wait_any(NULL, -1); } break; @@ -927,11 +1167,12 @@ static void debug_menu_loop(void) static void draw_frame_credits(void) { - smalltext_out16(4, 1, "build: " __DATE__ " " __TIME__, 0xe7fc); + smalltext_out16(4, 1, "build: " __DATE__ " " __TIME__, PXMAKE(0xe0, 0xff, 0xe0)); } static const char credits[] = - "PicoDrive v" VERSION " (c) notaz, 2006-2013\n\n\n" + "PicoDrive v" VERSION "\n" + "(c) notaz, 2006-2013; irixxxx, 2018-2024\n\n" "Credits:\n" "fDave: initial code\n" #ifdef EMU_C68K @@ -962,7 +1203,7 @@ static void menu_main_draw_status(void) { static time_t last_bat_read = 0; static int last_bat_val = -1; - unsigned short *bp = g_screen_ptr; + unsigned short *bp = g_menuscreen_ptr; int bat_h = me_mfont_h * 2 / 3; int i, u, w, wfill, batt_val; struct tm *tmp; @@ -976,7 +1217,7 @@ static void menu_main_draw_status(void) tmp = gmtime(<ime); strftime(time_s, sizeof(time_s), "%H:%M", tmp); - text_out16(g_screen_width - me_mfont_w * 6, me_mfont_h + 2, time_s); + text_out16(g_menuscreen_w - me_mfont_w * 6, me_mfont_h + 2, time_s); if (ltime - last_bat_read > 10) { last_bat_read = ltime; @@ -989,25 +1230,27 @@ static void menu_main_draw_status(void) return; /* battery info */ - bp += (me_mfont_h * 2 + 2) * g_screen_width + g_screen_width - me_mfont_w * 3 - 3; + bp += (me_mfont_h * 2 + 2) * g_menuscreen_pp + g_menuscreen_w - me_mfont_w * 3 - 3; for (i = 0; i < me_mfont_w * 2; i++) bp[i] = menu_text_color; for (i = 0; i < me_mfont_w * 2; i++) - bp[i + g_screen_width * bat_h] = menu_text_color; + bp[i + g_menuscreen_pp * bat_h] = menu_text_color; for (i = 0; i <= bat_h; i++) - bp[i * g_screen_width] = - bp[i * g_screen_width + me_mfont_w * 2] = menu_text_color; + bp[i * g_menuscreen_pp] = + bp[i * g_menuscreen_pp + me_mfont_w * 2] = menu_text_color; for (i = 2; i < bat_h - 1; i++) - bp[i * g_screen_width - 1] = - bp[i * g_screen_width - 2] = menu_text_color; + bp[i * g_menuscreen_pp - 1] = + bp[i * g_menuscreen_pp - 2] = menu_text_color; w = me_mfont_w * 2 - 1; wfill = batt_val * w / 100; for (u = 1; u < bat_h; u++) for (i = 0; i < wfill; i++) - bp[(w - i) + g_screen_width * u] = menu_text_color; + bp[(w - i) + g_menuscreen_pp * u] = menu_text_color; } +static menu_entry e_menu_main[]; + static int main_menu_handler(int id, int keys) { const char *ret_name; @@ -1033,9 +1276,10 @@ static int main_menu_handler(int id, int keys) } break; case MA_MAIN_LOAD_ROM: + menu_w = menu_h = 0; rom_fname_reload = NULL; - ret_name = menu_loop_romsel(rom_fname_loaded, - sizeof(rom_fname_loaded), rom_exts, NULL); + ret_name = menu_loop_romsel_d(rom_fname_loaded, + sizeof(rom_fname_loaded), rom_exts, NULL, menu_draw_prep_selector); if (ret_name != NULL) { lprintf("selected file: %s\n", ret_name); rom_fname_reload = ret_name; @@ -1044,8 +1288,10 @@ static int main_menu_handler(int id, int keys) } break; case MA_MAIN_CHANGE_CD: - if (PicoAHW & PAHW_MCD) { - if (!Stop_CD()) + if (PicoIn.AHW & PAHW_MCD) { + // if cd is loaded, cdd_unload() triggers eject and + // returns 1, else we'll select and load new CD here + if (!cdd_unload()) menu_loop_tray(); return 1; } @@ -1072,23 +1318,88 @@ static int main_menu_handler(int id, int keys) return 0; } +static const char *mgn_picopage(int id, int *offs) +{ + strcpy(static_buff, " "); + sprintf(static_buff, "%i", PicoPicohw.page); + return static_buff; +} + +static int mh_picopage(int id, int keys) +{ + if (keys & (PBTN_LEFT|PBTN_RIGHT)) { // multi choice + PicoPicohw.page += (keys & PBTN_LEFT) ? -1 : 1; + if (PicoPicohw.page < 0) PicoPicohw.page = 6; + else if (PicoPicohw.page > 6) PicoPicohw.page = 0; + return 0; + } + return 1; +} + +static const char *mgn_saveloadcfg(int id, int *offs) +{ + strcpy(static_buff, " "); + if (config_slot != 0) + sprintf(static_buff, "[%i]", config_slot); + return static_buff; +} + +static int mh_saveloadcfg(int id, int keys) +{ + int ret; + + if (keys & (PBTN_LEFT|PBTN_RIGHT)) { // multi choice + config_slot += (keys & PBTN_LEFT) ? -1 : 1; + if (config_slot < 0) config_slot = 9; + else if (config_slot > 9) config_slot = 0; + me_enable(e_menu_main, MA_OPT_LOADCFG, PicoGameLoaded && config_slot != config_slot_current); + return 0; + } + + switch (id) { + case MA_OPT_SAVECFG: + case MA_OPT_SAVECFG_GAME: + if (emu_write_config(id == MA_OPT_SAVECFG_GAME ? 1 : 0)) + menu_update_msg("config saved"); + else + menu_update_msg("failed to write config"); + break; + case MA_OPT_LOADCFG: + ret = emu_read_config(rom_fname_loaded, 1); + if (!ret) ret = emu_read_config(NULL, 1); + if (ret) menu_update_msg("config loaded"); + else menu_update_msg("failed to load config"); + break; + default: + return 0; + } + + return 1; +} + +static const char h_saveload[] = "Game options are overloading global options"; + static menu_entry e_menu_main[] = { - mee_label ("PicoDrive " VERSION), mee_label (""), mee_label (""), mee_label (""), - mee_handler_id("Resume game", MA_MAIN_RESUME_GAME, main_menu_handler), - mee_handler_id("Save State", MA_MAIN_SAVE_STATE, main_menu_handler), - mee_handler_id("Load State", MA_MAIN_LOAD_STATE, main_menu_handler), - mee_handler_id("Reset game", MA_MAIN_RESET_GAME, main_menu_handler), - mee_handler_id("Load new ROM/ISO", MA_MAIN_LOAD_ROM, main_menu_handler), - mee_handler_id("Change CD/ISO", MA_MAIN_CHANGE_CD, main_menu_handler), - mee_handler ("Change options", menu_loop_options), - mee_handler ("Configure controls", menu_loop_keyconfig), - mee_handler_id("Credits", MA_MAIN_CREDITS, main_menu_handler), + mee_label (""), + mee_label (""), + mee_label (""), + mee_label (""), + mee_label (""), + mee_label (""), + mee_handler_id("Resume Game", MA_MAIN_RESUME_GAME, main_menu_handler), + mee_handler_id("Save Game", MA_MAIN_SAVE_STATE, main_menu_handler), + mee_handler_id("Load Game", MA_MAIN_LOAD_STATE, main_menu_handler), + mee_handler_id("Reset Game", MA_MAIN_RESET_GAME, main_menu_handler), + mee_handler_id("Change CD", MA_MAIN_CHANGE_CD, main_menu_handler), + mee_cust_s_h ("Storyware page", MA_MAIN_PICO_PAGE, 0,mh_picopage, mgn_picopage, NULL), mee_handler_id("Patches / GameGenie",MA_MAIN_PATCHES, main_menu_handler), - mee_handler_id("Exit", MA_MAIN_EXIT, main_menu_handler), + mee_handler_id("Press CIRCLE button to begin", MA_MAIN_LOAD_ROM, main_menu_handler), + mee_cust_s_h ("Save Game Options", MA_OPT_SAVECFG_GAME, 0, mh_saveloadcfg, mgn_saveloadcfg, h_saveload), + mee_cust_s_h ("Load Game Options", MA_OPT_LOADCFG, 0, mh_saveloadcfg, mgn_saveloadcfg, h_saveload), mee_end, }; @@ -1096,16 +1407,21 @@ void menu_loop(void) { static int sel = 0; + + me_enable(e_menu_main, MA_MAIN_RESUME_GAME, PicoGameLoaded); me_enable(e_menu_main, MA_MAIN_SAVE_STATE, PicoGameLoaded); me_enable(e_menu_main, MA_MAIN_LOAD_STATE, PicoGameLoaded); me_enable(e_menu_main, MA_MAIN_RESET_GAME, PicoGameLoaded); - me_enable(e_menu_main, MA_MAIN_CHANGE_CD, PicoAHW & PAHW_MCD); - me_enable(e_menu_main, MA_MAIN_PATCHES, PicoPatches != NULL); + me_enable(e_menu_main, MA_MAIN_CHANGE_CD, PicoIn.AHW & PAHW_MCD); + me_enable(e_menu_main, MA_MAIN_PICO_PAGE, PicoIn.AHW & PAHW_PICO); + me_enable(e_menu_main, MA_MAIN_PATCHES, PicoPatches != NULL); + me_enable(e_menu_main, MA_OPT_SAVECFG_GAME, PicoGameLoaded); + me_enable(e_menu_main, MA_OPT_LOADCFG, PicoGameLoaded && config_slot != config_slot_current); menu_enter(PicoGameLoaded); in_set_config_int(0, IN_CFG_BLOCKING, 1); - me_loop_d(e_menu_main, &sel, NULL, menu_main_draw_status); + me_loop_d(e_menu_main, &sel, menu_draw_prep_selector, menu_main_draw_status); if (PicoGameLoaded) { if (engineState == PGS_Menu) @@ -1126,8 +1442,8 @@ static int mh_tray_load_cd(int id, int keys) const char *ret_name; rom_fname_reload = NULL; - ret_name = menu_loop_romsel(rom_fname_loaded, - sizeof(rom_fname_loaded), rom_exts, NULL); + ret_name = menu_loop_romsel_d(rom_fname_loaded, + sizeof(rom_fname_loaded), rom_exts, NULL, menu_draw_prep); if (ret_name == NULL) return 0; @@ -1158,7 +1474,7 @@ int menu_loop_tray(void) menu_enter(PicoGameLoaded); in_set_config_int(0, IN_CFG_BLOCKING, 1); - me_loop(e_menu_tray, &sel); + me_loop_d(e_menu_tray, &sel, menu_draw_prep, NULL); if (engineState != PGS_RestartRun) { engineState = PGS_RestartRun; @@ -1187,19 +1503,36 @@ void menu_update_msg(const char *msg) /* hidden options for config engine only */ static menu_entry e_menu_hidden[] = { - mee_onoff("Accurate sprites", MA_OPT_ACC_SPRITES, PicoOpt, 0x080), - mee_onoff("autoload savestates", MA_OPT_AUTOLOAD_SAVE, g_autostateld_opt, 1), + mee_onoff("Accurate sprites", MA_OPT_ACC_SPRITES, PicoIn.opt, POPT_ACC_SPRITES), +// mee_range("Save slot", MA_OPT_SAVE_SLOT, state_slot, 0, 9), + +// mee_enum ("Confirm savestate", MA_OPT_CONFIRM_STATES, currentConfig.confirm_save, men_confirm_save), + mee_onoff("autoload savestates", MA_OPT_AUTOLOAD_SAVE, g_autostateld_opt, 1), + mee_onoff("SDL fullscreen mode", MA_OPT_VOUT_FULL, plat_target.vout_fullscreen, 1), + mee_onoff("Emulate Z80", MA_OPT2_ENABLE_Z80, PicoIn.opt, POPT_EN_Z80), + mee_onoff("Emulate YM2612 (FM)", MA_OPT2_ENABLE_YM2612, PicoIn.opt, POPT_EN_FM), + mee_onoff("Disable YM2612 SSG-EG", MA_OPT2_DISABLE_YM_SSG,PicoIn.opt, POPT_DIS_FM_SSGEG), + mee_onoff("Enable YM2612 DAC noise", MA_OPT2_ENABLE_YM_DAC, PicoIn.opt, POPT_EN_FM_DAC), + mee_onoff("Emulate SN76496 (PSG)", MA_OPT2_ENABLE_SN76496,PicoIn.opt, POPT_EN_PSG), + mee_onoff("Scale/Rot. fx", MA_CDOPT_SCALEROT_CHIP,PicoIn.opt, POPT_EN_MCD_GFX), + mee_onoff("32X enabled", MA_32XOPT_ENABLE_32X, PicoIn.opt, POPT_EN_32X), mee_end, }; static menu_entry *e_menu_table[] = { e_menu_options, + e_menu_ui_options, + e_menu_snd_options, e_menu_gfx_options, e_menu_adv_options, + e_menu_md_options, e_menu_cd_options, #ifndef NO_32X e_menu_32x_options, +#endif +#ifndef NO_SMS + e_menu_sms_options, #endif e_menu_keyconfig, e_menu_hidden, diff --git a/platform/common/menu_pico.h b/platform/common/menu_pico.h index c5edde3d..59e6af80 100644 --- a/platform/common/menu_pico.h +++ b/platform/common/menu_pico.h @@ -12,10 +12,15 @@ typedef enum MA_MAIN_RESET_GAME, MA_MAIN_LOAD_ROM, MA_MAIN_CHANGE_CD, + MA_MAIN_PICO_PAGE, MA_MAIN_CONTROLS, MA_MAIN_CREDITS, MA_MAIN_PATCHES, MA_MAIN_EXIT, + MA_PROFILE_ACCURATE, + MA_PROFILE_BALANCED, + MA_PROFILE_FAST, + MA_PROFILE_BREAKING, MA_OPT_RENDERER, MA_OPT_SCALING, MA_OPT_VSCALING, @@ -28,7 +33,6 @@ typedef enum MA_OPT_INPUT_DEV0, MA_OPT_INPUT_DEV1, MA_OPT_REGION, - MA_OPT_SRAM_STATES, MA_OPT_CONFIRM_STATES, MA_OPT_SAVE_SLOT, MA_OPT_CPU_CLOCKS, @@ -41,29 +45,34 @@ typedef enum MA_OPT_INTERLACED, /* giz */ MA_OPT_TEARING_FIX, /* wiz */ MA_OPT_VOUT_MODE, + MA_OPT_VOUT_FULL, MA_OPT_AUTOLOAD_SAVE, + MA_OPT_SOUND_FILTER, + MA_OPT_SOUND_ALPHA, + MA_OPT_FM_FILTER, + MA_OPT_PICO_PEN, MA_OPT2_GAMMA, MA_OPT2_A_SN_GAMMA, MA_OPT2_DBLBUFF, /* giz */ MA_OPT2_VSYNC, MA_OPT2_ENABLE_Z80, MA_OPT2_ENABLE_YM2612, + MA_OPT2_DISABLE_YM_SSG, + MA_OPT2_ENABLE_YM_DAC, MA_OPT2_ENABLE_SN76496, - MA_OPT2_GZIP_STATES, + MA_OPT2_ENABLE_YM2413, + MA_OPT2_ENABLE_GGLCD, MA_OPT2_NO_LAST_ROM, MA_OPT2_RAMTIMINGS, /* gp2x */ - MA_OPT2_STATUS_LINE, /* psp */ MA_OPT2_NO_FRAME_LIMIT, /* psp */ MA_OPT2_DYNARECS, MA_OPT2_NO_SPRITE_LIM, MA_OPT2_NO_IDLE_LOOPS, + MA_OPT2_OVERCLOCK_M68K, + MA_OPT2_MAX_FRAMESKIP, + MA_OPT2_PWM_IRQ_OPT, MA_OPT2_DONE, - MA_OPT3_SCALE, /* psp (all OPT3) */ - MA_OPT3_HSCALE32, - MA_OPT3_HSCALE40, - MA_OPT3_PRES_NOSCALE, - MA_OPT3_PRES_SCALE43, - MA_OPT3_PRES_FULLSCR, + MA_OPT3_GAMMAA, /* psp (all OPT3) */ MA_OPT3_FILTERING, MA_OPT3_VSYNC, MA_OPT3_BLACKLVL, @@ -87,8 +96,14 @@ typedef enum MA_32XOPT_PWM, MA_32XOPT_MSH2_CYCLES, MA_32XOPT_SSH2_CYCLES, + MA_SMSOPT_HARDWARE, + MA_SMSOPT_MAPPER, + MA_SMSOPT_GHOSTING, + MA_SMSOPT_TMSPALETTE, MA_CTRL_PLAYER1, MA_CTRL_PLAYER2, + MA_CTRL_PLAYER3, + MA_CTRL_PLAYER4, MA_CTRL_EMU, MA_CTRL_TURBO_RATE, MA_CTRL_DEADZONE, diff --git a/platform/common/mp3.c b/platform/common/mp3.c index b2bcaf40..2f3846f5 100644 --- a/platform/common/mp3.c +++ b/platform/common/mp3.c @@ -21,33 +21,6 @@ unsigned short mpeg1_l3_bitrates[16] = { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320 }; -int mp3_find_sync_word(const unsigned char *buf, int size) -{ - const unsigned char *p, *pe; - - /* find byte-aligned syncword - need 12 (MPEG 1,2) or 11 (MPEG 2.5) matching bits */ - for (p = buf, pe = buf + size - 3; p <= pe; p++) - { - int pn; - if (p[0] != 0xff) - continue; - pn = p[1]; - if ((pn & 0xf8) != 0xf8 || // currently must be MPEG1 - (pn & 6) == 0) { // invalid layer - p++; continue; - } - pn = p[2]; - if ((pn & 0xf0) < 0x20 || (pn & 0xf0) == 0xf0 || // bitrates - (pn & 0x0c) != 0) { // not 44kHz - continue; - } - - return p - buf; - } - - return -1; -} - static int try_get_bitrate(unsigned char *buf, int buf_size) { int offs1, offs = 0; @@ -115,7 +88,7 @@ void mp3_start_play(void *f_, int pos1024) cdda_out_pos = 0; decoder_active = 0; - if (!(PicoOpt & POPT_EN_MCD_CDDA) || f == NULL) // cdda disabled or no file? + if (!(PicoIn.opt & POPT_EN_MCD_CDDA) || f == NULL) // cdda disabled or no file? return; fseek(f, 0, SEEK_END); @@ -155,10 +128,10 @@ void mp3_start_play(void *f_, int pos1024) mp3dec_decode(mp3_current_file, &mp3_file_pos, mp3_file_len); } -void mp3_update(int *buffer, int length, int stereo) +void mp3_update(s32 *buffer, int length, int stereo) { - int length_mp3, shr = 0; - void (*mix_samples)(int *dest_buf, short *mp3_buf, int count) = mix_16h_to_32; + int length_mp3; + void (*mix_samples)(s32 *dest_buf, short *mp3_buf, int count, int fac16) = mix_16h_to_32_resample_stereo; if (mp3_current_file == NULL || mp3_file_pos >= mp3_file_len) return; /* no file / EOF */ @@ -166,35 +139,29 @@ void mp3_update(int *buffer, int length, int stereo) if (!decoder_active) return; - length_mp3 = length; - if (PsndRate <= 11025 + 100) { - mix_samples = mix_16h_to_32_s2; - length_mp3 <<= 2; shr = 2; - } - else if (PsndRate <= 22050 + 100) { - mix_samples = mix_16h_to_32_s1; - length_mp3 <<= 1; shr = 1; - } + length_mp3 = length * Pico.snd.cdda_mult >> 16; + if (!stereo) + mix_samples = mix_16h_to_32_resample_mono; if (1152 - cdda_out_pos >= length_mp3) { mix_samples(buffer, cdda_out_buffer + cdda_out_pos * 2, - length * 2); + length, Pico.snd.cdda_mult); cdda_out_pos += length_mp3; } else { - int ret, left = 1152 - cdda_out_pos; + int left = (1152 - cdda_out_pos) * Pico.snd.cdda_div >> 16; + int ret, sm = stereo ? 2 : 1; if (left > 0) mix_samples(buffer, cdda_out_buffer + cdda_out_pos * 2, - (left >> shr) * 2); + left, Pico.snd.cdda_mult); ret = mp3dec_decode(mp3_current_file, &mp3_file_pos, mp3_file_len); if (ret == 0) { - cdda_out_pos = length_mp3 - left; - mix_samples(buffer + (left >> shr) * 2, - cdda_out_buffer, - (cdda_out_pos >> shr) * 2); + mix_samples(buffer + left * sm, cdda_out_buffer, + length-left, Pico.snd.cdda_mult); + cdda_out_pos = (length-left) * Pico.snd.cdda_mult >> 16; } else cdda_out_pos = 0; } diff --git a/platform/common/mp3.h b/platform/common/mp3.h index eb66db88..4a2b230b 100644 --- a/platform/common/mp3.h +++ b/platform/common/mp3.h @@ -12,8 +12,8 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len); extern unsigned short mpeg1_l3_bitrates[16]; #ifdef __GP2X__ -void mp3_update_local(int *buffer, int length, int stereo); -void mp3_start_play_local(void *f, int pos); +int _mp3dec_start(FILE *f, int fpos_start); +int _mp3dec_decode(FILE *f, int *file_pos, int file_len); #endif #endif // __COMMON_MP3_H__ diff --git a/platform/common/mp3_drmp3.c b/platform/common/mp3_drmp3.c new file mode 100644 index 00000000..f244c1b8 --- /dev/null +++ b/platform/common/mp3_drmp3.c @@ -0,0 +1,76 @@ +/* + * MP3 decoding using dr_mp3 + * (C) irixxxx, 2020 + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ + +#include + +#include + +#ifdef USE_LIBRETRO_VFS +// Ugh, drmp3 tries to use wfopen on windows, which breaks libretro VFS... +#define __acrt_iob_func __acrt_iob_func2 +#define _wfopen_s(p,m) NULL +#define _wfopen(p,m) NULL +#endif +#ifdef __GP2X__ +#define mp3dec_decode _mp3dec_decode +#define mp3dec_start _mp3dec_start +#endif +#define DR_MP3_IMPLEMENTATION +#include "dr_libs/dr_mp3.h" +#include "mp3.h" + +static drmp3dec mp3dec; +static unsigned char mp3_input_buffer[2 * 1024]; + +int mp3dec_start(FILE *f, int fpos_start) +{ + drmp3dec_init(&mp3dec); + return 0; +} + +int mp3dec_decode(FILE *f, int *file_pos, int file_len) +{ + drmp3dec_frame_info info; + unsigned char *readPtr; + int bytesLeft; + int offset; // mp3 frame offset from readPtr + int len; + int retry = 3; + + do + { + if (*file_pos >= file_len) + return 1; /* EOF, nothing to do */ + + fseek(f, *file_pos, SEEK_SET); + bytesLeft = fread(mp3_input_buffer, 1, sizeof(mp3_input_buffer), f); + + offset = mp3_find_sync_word(mp3_input_buffer, bytesLeft); + if (offset < 0) { + lprintf("find_sync_word (%i/%i) err %i\n", + *file_pos, file_len, offset); + *file_pos = file_len; + return 1; // EOF + } + *file_pos += offset; + readPtr = mp3_input_buffer + offset; + bytesLeft -= offset; + + len = drmp3dec_decode_frame(&mp3dec, readPtr, bytesLeft, cdda_out_buffer, &info); + if (len > 0) // retrieved decoded data + *file_pos += info.frame_bytes; + else if (info.frame_bytes > 0) // no output but input consumed? + *file_pos += 1; // try to skip frame + else if (offset == 0) // bad frame? + *file_pos += 1; // try resyncing + // else // truncated frame, try more data + } + while (len <= 0 && --retry > 0); + + return len <= 0; +} diff --git a/platform/common/mp3_helix.c b/platform/common/mp3_helix.c index b2785298..f3650bf2 100644 --- a/platform/common/mp3_helix.c +++ b/platform/common/mp3_helix.c @@ -9,21 +9,31 @@ #include #include +#include #include -#include -#include "helix/pub/mp3dec.h" +/*#include "helix/pub/mp3dec.h"*/ #include "mp3.h" -#include "lprintf.h" -static HMP3Decoder mp3dec; +#ifndef _MP3DEC_H +typedef void *HMP3Decoder; +#define ERR_MP3_INDATA_UNDERFLOW -1 +#define ERR_MP3_MAINDATA_UNDERFLOW -2 +#endif + +static void *mp3dec; static unsigned char mp3_input_buffer[2 * 1024]; #ifdef __GP2X__ -#define mp3_update mp3_update_local -#define mp3_start_play mp3_start_play_local +#define mp3dec_decode _mp3dec_decode +#define mp3dec_start _mp3dec_start #endif +static void *libhelix; +HMP3Decoder (*p_MP3InitDecoder)(void); +void (*p_MP3FreeDecoder)(HMP3Decoder); +int (*p_MP3Decode)(HMP3Decoder, unsigned char **, int *, short *, int); + int mp3dec_decode(FILE *f, int *file_pos, int file_len) { unsigned char *readPtr; @@ -31,6 +41,7 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len) int offset; // mp3 frame offset from readPtr int had_err; int err = 0; + int retry = 3; do { @@ -51,7 +62,7 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len) bytesLeft -= offset; had_err = err; - err = MP3Decode(mp3dec, &readPtr, &bytesLeft, cdda_out_buffer, 0); + err = p_MP3Decode(mp3dec, &readPtr, &bytesLeft, cdda_out_buffer, 0); if (err) { if (err == ERR_MP3_MAINDATA_UNDERFLOW && !had_err) { // just need another frame @@ -79,17 +90,38 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len) } *file_pos += readPtr - mp3_input_buffer; } - while (0); + while (err && --retry > 0); - return 0; + return !!err; } int mp3dec_start(FILE *f, int fpos_start) { + if (libhelix == NULL) { + libhelix = dlopen("./libhelix.so", RTLD_NOW); + if (libhelix == NULL) { + lprintf("mp3dec: load libhelix.so: %s\n", dlerror()); + return -1; + } + + p_MP3InitDecoder = dlsym(libhelix, "MP3InitDecoder"); + p_MP3FreeDecoder = dlsym(libhelix, "MP3FreeDecoder"); + p_MP3Decode = dlsym(libhelix, "MP3Decode"); + + if (p_MP3InitDecoder == NULL || p_MP3FreeDecoder == NULL + || p_MP3Decode == NULL) + { + lprintf("mp3dec: missing symbol(s) in libhelix.so\n"); + dlclose(libhelix); + libhelix = NULL; + return -1; + } + } + // must re-init decoder for new track if (mp3dec) - MP3FreeDecoder(mp3dec); - mp3dec = MP3InitDecoder(); + p_MP3FreeDecoder(mp3dec); + mp3dec = p_MP3InitDecoder(); return (mp3dec == 0) ? -1 : 0; } diff --git a/platform/common/mp3_libavcodec.c b/platform/common/mp3_libavcodec.c index 2d3c799c..6ed0fcd5 100644 --- a/platform/common/mp3_libavcodec.c +++ b/platform/common/mp3_libavcodec.c @@ -13,9 +13,17 @@ #include #include -#include "../libpicofe/lprintf.h" #include "mp3.h" +#if LIBAVCODEC_VERSION_MAJOR < 55 +#define AVCodecID CodecID +#define AV_CODEC_ID_MP3 CODEC_ID_MP3 +#define AV_CH_LAYOUT_STEREO CH_LAYOUT_STEREO +#define AV_SAMPLE_FMT_S16 SAMPLE_FMT_S16 +#define request_sample_fmt sample_fmt +#endif + +static void *libavcodec; static AVCodecContext *ctx; /* avoid compile time linking to libavcodec due to huge list of it's deps.. @@ -32,7 +40,8 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len) int bytes_in; int bytes_out; int offset; - int len; + int len = -1; + int retry = 3; p_av_init_packet(&avpkt); @@ -51,6 +60,7 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len) *file_pos = file_len; return 1; // EOF } + *file_pos += offset; // to avoid being flooded with "incorrect frame size" errors, // we must calculate and pass exact frame size - lame @@ -60,7 +70,6 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len) if (offset > 0 && bytes_in - offset < frame_size) { // underflow - *file_pos += offset; continue; } @@ -80,47 +89,60 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len) *file_pos, file_len, len); // attempt to skip the offending frame.. - *file_pos += offset + 1; - continue; - } - - *file_pos += offset + len; + *file_pos += 1; + } else + *file_pos += len; } - while (0); + while (len <= 0 && --retry > 0); - return 0; + return len <= 0; } int mp3dec_start(FILE *f, int fpos_start) { void (*avcodec_register_all)(void); - AVCodec *(*avcodec_find_decoder)(enum CodecID id); + AVCodec *(*avcodec_find_decoder)(enum AVCodecID id); +#if LIBAVCODEC_VERSION_MAJOR < 54 AVCodecContext *(*avcodec_alloc_context)(void); int (*avcodec_open)(AVCodecContext *avctx, AVCodec *codec); +#else + AVCodecContext *(*avcodec_alloc_context)(AVCodec *); + int (*avcodec_open)(AVCodecContext *avctx, AVCodec *codec, AVDictionary **); +#endif void (*av_free)(void *ptr); AVCodec *codec; - void *soh; int ret; if (ctx != NULL) return 0; +#if LIBAVCODEC_VERSION_MAJOR < 54 // either v52 or v53 should be ok - soh = dlopen("libavcodec.so.52", RTLD_NOW); - if (soh == NULL) - soh = dlopen("libavcodec.so.53", RTLD_NOW); - if (soh == NULL) { + if (libavcodec == NULL) + libavcodec = dlopen("libavcodec.so.52", RTLD_NOW); + if (libavcodec == NULL) + libavcodec = dlopen("libavcodec.so.53", RTLD_NOW); +#else + if (libavcodec == NULL) + libavcodec = dlopen("libavcodec.so", RTLD_NOW); +#endif + if (libavcodec == NULL) { lprintf("mp3dec: load libavcodec.so: %s\n", dlerror()); return -1; } - avcodec_register_all = dlsym(soh, "avcodec_register_all"); - avcodec_find_decoder = dlsym(soh, "avcodec_find_decoder"); - avcodec_alloc_context = dlsym(soh, "avcodec_alloc_context"); - avcodec_open = dlsym(soh, "avcodec_open"); - av_free = dlsym(soh, "av_free"); - p_av_init_packet = dlsym(soh, "av_init_packet"); - p_avcodec_decode_audio3 = dlsym(soh, "avcodec_decode_audio3"); + avcodec_register_all = dlsym(libavcodec, "avcodec_register_all"); + avcodec_find_decoder = dlsym(libavcodec, "avcodec_find_decoder"); +#if LIBAVCODEC_VERSION_MAJOR < 54 + avcodec_alloc_context = dlsym(libavcodec, "avcodec_alloc_context"); + avcodec_open = dlsym(libavcodec, "avcodec_open"); +#else + avcodec_alloc_context = dlsym(libavcodec, "avcodec_alloc_context3"); + avcodec_open = dlsym(libavcodec, "avcodec_open2"); +#endif + av_free = dlsym(libavcodec, "av_free"); + p_av_init_packet = dlsym(libavcodec, "av_init_packet"); + p_avcodec_decode_audio3 = dlsym(libavcodec, "avcodec_decode_audio3"); if (avcodec_register_all == NULL || avcodec_find_decoder == NULL || avcodec_alloc_context == NULL || avcodec_open == NULL @@ -128,7 +150,6 @@ int mp3dec_start(FILE *f, int fpos_start) || p_av_init_packet == NULL || p_avcodec_decode_audio3 == NULL) { lprintf("mp3dec: missing symbol(s) in libavcodec.so\n"); - dlclose(soh); return -1; } @@ -137,19 +158,30 @@ int mp3dec_start(FILE *f, int fpos_start) //avcodec_init(); avcodec_register_all(); - // AV_CODEC_ID_MP3 ? - codec = avcodec_find_decoder(CODEC_ID_MP3); + codec = avcodec_find_decoder(AV_CODEC_ID_MP3); if (codec == NULL) { lprintf("mp3dec: codec missing\n"); return -1; } +#if LIBAVCODEC_VERSION_MAJOR < 54 ctx = avcodec_alloc_context(); if (ctx == NULL) { lprintf("mp3dec: avcodec_alloc_context failed\n"); return -1; } +#else + ctx = avcodec_alloc_context(codec); + if (ctx == NULL) { + lprintf("mp3dec: avcodec_alloc_context failed\n"); + return -1; + } +#endif + ctx->request_channel_layout = AV_CH_LAYOUT_STEREO; + ctx->request_sample_fmt = AV_SAMPLE_FMT_S16; + ctx->sample_rate = 44100; +#if LIBAVCODEC_VERSION_MAJOR < 54 ret = avcodec_open(ctx, codec); if (ret < 0) { lprintf("mp3dec: avcodec_open failed: %d\n", ret); @@ -157,6 +189,14 @@ int mp3dec_start(FILE *f, int fpos_start) ctx = NULL; return -1; } - +#else + ret = avcodec_open(ctx, codec, NULL); + if (ret < 0) { + lprintf("mp3dec: avcodec_open failed: %d\n", ret); + av_free(ctx); + ctx = NULL; + return -1; + } +#endif return 0; } diff --git a/platform/common/mp3_minimp3.c b/platform/common/mp3_minimp3.c new file mode 100644 index 00000000..5b5a7f5b --- /dev/null +++ b/platform/common/mp3_minimp3.c @@ -0,0 +1,65 @@ +/* + * MP3 decoding using minimp3 + * (C) irixxxx, 2020 + * + * This work is licensed under the terms of MAME license. + * See COPYING file in the top-level directory. + */ + +#include + +#include +#define MINIMP3_IMPLEMENTATION +#include "minimp3/minimp3.h" +#include "mp3.h" + +static mp3dec_t mp3dec; +static unsigned char mp3_input_buffer[2 * 1024]; + +int mp3dec_start(FILE *f, int fpos_start) +{ + mp3dec_init(&mp3dec); + return 0; +} + +int mp3dec_decode(FILE *f, int *file_pos, int file_len) +{ + mp3dec_frame_info_t info; + unsigned char *readPtr; + int bytesLeft; + int offset; // mp3 frame offset from readPtr + int len; + int retry = 3; + + do + { + if (*file_pos >= file_len) + return 1; /* EOF, nothing to do */ + + fseek(f, *file_pos, SEEK_SET); + bytesLeft = fread(mp3_input_buffer, 1, sizeof(mp3_input_buffer), f); + + offset = mp3_find_sync_word(mp3_input_buffer, bytesLeft); + if (offset < 0) { + lprintf("find_sync_word (%i/%i) err %i\n", + *file_pos, file_len, offset); + *file_pos = file_len; + return 1; // EOF + } + *file_pos += offset; + readPtr = mp3_input_buffer + offset; + bytesLeft -= offset; + + len = mp3dec_decode_frame(&mp3dec, readPtr, bytesLeft, cdda_out_buffer, &info); + if (len > 0) // retrieved decoded data + *file_pos += info.frame_bytes; + else if (info.frame_bytes > 0) // no output but input consumed? + *file_pos += 1; // try to skip frame + else if (offset == 0) // bad frame? + *file_pos += 1; // try resyncing + // else // truncated frame, try more data + } + while (len <= 0 && --retry > 0); + + return len <= 0; +} diff --git a/platform/common/mp3_sync.c b/platform/common/mp3_sync.c new file mode 100644 index 00000000..509c259d --- /dev/null +++ b/platform/common/mp3_sync.c @@ -0,0 +1,27 @@ + +int mp3_find_sync_word(const unsigned char *buf, int size) +{ + const unsigned char *p, *pe; + + /* find byte-aligned syncword - need 12 (MPEG 1,2) or 11 (MPEG 2.5) matching bits */ + for (p = buf, pe = buf + size - 3; p <= pe; p++) + { + int pn; + if (p[0] != 0xff) + continue; + pn = p[1]; + if ((pn & 0xf8) != 0xf8 || // currently must be MPEG1 + (pn & 6) == 0) { // invalid layer + p++; continue; + } + pn = p[2]; + if ((pn & 0xf0) < 0x20 || (pn & 0xf0) == 0xf0 || // bitrates + (pn & 0x0c) != 0) { // not 44kHz + continue; + } + + return p - buf; + } + + return -1; +} diff --git a/platform/common/plat_sdl.c b/platform/common/plat_sdl.c index 600af45a..d1e833f3 100644 --- a/platform/common/plat_sdl.c +++ b/platform/common/plat_sdl.c @@ -1,6 +1,7 @@ /* * PicoDrive * (C) notaz, 2013 + * (C) irixxxx, 2020-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -9,45 +10,80 @@ #include #include "../libpicofe/input.h" +#include "../libpicofe/plat.h" #include "../libpicofe/plat_sdl.h" #include "../libpicofe/in_sdl.h" #include "../libpicofe/gl.h" #include "emu.h" #include "menu_pico.h" #include "input_pico.h" +#include "plat_sdl.h" #include "version.h" -static void *shadow_fb; +#include -static const struct in_default_bind in_sdl_defbinds[] = { - { SDLK_UP, IN_BINDTYPE_PLAYER12, GBTN_UP }, - { SDLK_DOWN, IN_BINDTYPE_PLAYER12, GBTN_DOWN }, - { SDLK_LEFT, IN_BINDTYPE_PLAYER12, GBTN_LEFT }, - { SDLK_RIGHT, IN_BINDTYPE_PLAYER12, GBTN_RIGHT }, - { SDLK_z, IN_BINDTYPE_PLAYER12, GBTN_A }, - { SDLK_x, IN_BINDTYPE_PLAYER12, GBTN_B }, - { SDLK_c, IN_BINDTYPE_PLAYER12, GBTN_C }, - { SDLK_a, IN_BINDTYPE_PLAYER12, GBTN_X }, - { SDLK_s, IN_BINDTYPE_PLAYER12, GBTN_Y }, - { SDLK_d, IN_BINDTYPE_PLAYER12, GBTN_Z }, - { SDLK_RETURN, IN_BINDTYPE_PLAYER12, GBTN_START }, - { SDLK_f, IN_BINDTYPE_PLAYER12, GBTN_MODE }, - { SDLK_ESCAPE, IN_BINDTYPE_EMU, PEVB_MENU }, - { SDLK_F1, IN_BINDTYPE_EMU, PEVB_STATE_SAVE }, - { SDLK_F2, IN_BINDTYPE_EMU, PEVB_STATE_LOAD }, - { SDLK_F3, IN_BINDTYPE_EMU, PEVB_SSLOT_PREV }, - { SDLK_F4, IN_BINDTYPE_EMU, PEVB_SSLOT_NEXT }, - { SDLK_F5, IN_BINDTYPE_EMU, PEVB_SWITCH_RND }, - { SDLK_F6, IN_BINDTYPE_EMU, PEVB_PICO_PPREV }, - { SDLK_F7, IN_BINDTYPE_EMU, PEVB_PICO_PNEXT }, - { SDLK_F8, IN_BINDTYPE_EMU, PEVB_PICO_SWINP }, - { SDLK_BACKSPACE, IN_BINDTYPE_EMU, PEVB_FF }, - { 0, 0, 0 } -}; +static void *shadow_fb; +static int shadow_size; +static struct area { int w, h; } area; + +static struct in_pdata in_sdl_platform_data; + +static int sound_rates[] = { 8000, 11025, 16000, 22050, 32000, 44100, 53000, -1 }; +struct plat_target plat_target = { .sound_rates = sound_rates }; + +#if defined __MIYOO__ +const char *plat_device = "miyoo"; +#elif defined __GCW0__ +const char *plat_device = "gcw0"; +#elif defined __RETROFW__ +const char *plat_device = "retrofw"; +#elif defined __DINGUX__ +const char *plat_device = "dingux"; +#else +const char *plat_device = ""; +#endif + +int plat_parse_arg(int argc, char *argv[], int *x) +{ +#if defined __OPENDINGUX__ + if (*plat_device == '\0' && strcasecmp(argv[*x], "-device") == 0) { + plat_device = argv[++(*x)]; + return 0; + } +#endif + return 1; +} + +void plat_early_init(void) +{ +} + +int plat_target_init(void) +{ +#if defined __ODBETA__ + if (*plat_device == '\0') { + /* ODbeta should always have a device tree, get the model info from there */ + FILE *f = fopen("/proc/device-tree/compatible", "r"); + if (f) { + char buf[10]; + int c = fread(buf, 1, sizeof(buf), f); + if (strncmp(buf, "gcw,", 4) == 0) + plat_device = "gcw0"; + } + } +#endif + return 0; +} + +void plat_target_finish(void) +{ +} /* YUV stuff */ static int yuv_ry[32], yuv_gy[32], yuv_by[32]; static unsigned char yuv_u[32 * 2], yuv_v[32 * 2]; +static unsigned char yuv_y[256]; +static struct uyvy { uint32_t y:8; uint32_t vyu:24; } yuv_uyvy[65536]; void bgr_to_uyvy_init(void) { @@ -78,56 +114,155 @@ void bgr_to_uyvy_init(void) v = 255; yuv_v[i + 32] = v; } + // valid Y range seems to be 16..235 + for (i = 0; i < 256; i++) { + yuv_y[i] = 16 + 219 * i / 32; + } + // everything combined into one large array for speed + for (i = 0; i < 65536; i++) { + int r = (i >> 11) & 0x1f, g = (i >> 6) & 0x1f, b = (i >> 0) & 0x1f; + int y = (yuv_ry[r] + yuv_gy[g] + yuv_by[b]) >> 16; + yuv_uyvy[i].y = yuv_y[y]; +#if CPU_IS_LE + yuv_uyvy[i].vyu = (yuv_v[r-y + 32] << 16) | (yuv_y[y] << 8) | yuv_u[b-y + 32]; +#else + yuv_uyvy[i].vyu = (yuv_v[b-y + 32] << 16) | (yuv_y[y] << 8) | yuv_u[r-y + 32]; +#endif + } } -void rgb565_to_uyvy(void *d, const void *s, int pixels) +void rgb565_to_uyvy(void *d, const void *s, int w, int h, int pitch, int dpitch, int x2) { - unsigned int *dst = d; - const unsigned short *src = s; - const unsigned char *yu = yuv_u + 32; - const unsigned char *yv = yuv_v + 32; - int r0, g0, b0, r1, g1, b1; - int y0, y1, u, v; + uint32_t *dst = d; + const uint16_t *src = s; + int i; - for (; pixels > 0; src += 2, dst++, pixels -= 2) - { - r0 = (src[0] >> 11) & 0x1f; - g0 = (src[0] >> 6) & 0x1f; - b0 = src[0] & 0x1f; - r1 = (src[1] >> 11) & 0x1f; - g1 = (src[1] >> 6) & 0x1f; - b1 = src[1] & 0x1f; - y0 = (yuv_ry[r0] + yuv_gy[g0] + yuv_by[b0]) >> 16; - y1 = (yuv_ry[r1] + yuv_gy[g1] + yuv_by[b1]) >> 16; - u = yu[b0 - y0]; - v = yv[r0 - y0]; - // valid Y range seems to be 16..235 - y0 = 16 + 219 * y0 / 31; - y1 = 16 + 219 * y1 / 31; - - *dst = (y1 << 24) | (v << 16) | (y0 << 8) | u; + if (x2) while (h--) { + for (i = w; i >= 4; src += 4, dst += 4, i -= 4) + { + struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1]; + struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3]; +#if CPU_IS_LE + dst[0] = (uyvy0->y << 24) | uyvy0->vyu; + dst[1] = (uyvy1->y << 24) | uyvy1->vyu; + dst[2] = (uyvy2->y << 24) | uyvy2->vyu; + dst[3] = (uyvy3->y << 24) | uyvy3->vyu; +#else + dst[0] = uyvy0->y | (uyvy0->vyu << 8); + dst[1] = uyvy1->y | (uyvy1->vyu << 8); + dst[2] = uyvy2->y | (uyvy2->vyu << 8); + dst[3] = uyvy3->y | (uyvy3->vyu << 8); +#endif + } + src += pitch - (w-i); + dst += (dpitch - 2*(w-i))/2; + } else while (h--) { + for (i = w; i >= 4; src += 4, dst += 2, i -= 4) + { + struct uyvy *uyvy0 = yuv_uyvy + src[0], *uyvy1 = yuv_uyvy + src[1]; + struct uyvy *uyvy2 = yuv_uyvy + src[2], *uyvy3 = yuv_uyvy + src[3]; +#if CPU_IS_LE + dst[0] = (uyvy1->y << 24) | uyvy0->vyu; + dst[1] = (uyvy3->y << 24) | uyvy2->vyu; +#else + dst[0] = uyvy1->y | (uyvy0->vyu << 8); + dst[1] = uyvy3->y | (uyvy2->vyu << 8); +#endif + } + src += pitch - (w-i); + dst += (dpitch - (w-i))/2; } } +static int clear_buf_cnt, clear_stat_cnt; + +static void resize_buffers(void) +{ + // make sure the shadow buffers are big enough in case of resize + if (shadow_size < g_menuscreen_w * g_menuscreen_h * 2) { + shadow_size = g_menuscreen_w * g_menuscreen_h * 2; + shadow_fb = realloc(shadow_fb, shadow_size); + g_menubg_ptr = realloc(g_menubg_ptr, shadow_size); + } +} + +void plat_video_set_size(int w, int h) +{ + if (area.w != w || area.h != h) { + area = (struct area) { w, h }; + if (plat_sdl_change_video_mode(w, h, 0) < 0) { + // failed, revert to original resolution + area = (struct area) { g_screen_width,g_screen_height }; + plat_sdl_change_video_mode(g_screen_width, g_screen_height, 0); + } + if (!plat_sdl_overlay && !plat_sdl_gl_active) { + g_screen_width = plat_sdl_screen->w; + g_screen_height = plat_sdl_screen->h; + g_screen_ppitch = plat_sdl_screen->pitch/2; + g_screen_ptr = plat_sdl_screen->pixels; + } else { + g_screen_width = w; + g_screen_height = h; + g_screen_ppitch = w; + } + } +} + +void plat_video_set_shadow(int w, int h) +{ + g_screen_width = w; + g_screen_height = h; + g_screen_ppitch = w; + g_screen_ptr = shadow_fb; +} + void plat_video_flip(void) { + resize_buffers(); + if (plat_sdl_overlay != NULL) { SDL_Rect dstrect = { 0, 0, plat_sdl_screen->w, plat_sdl_screen->h }; - SDL_LockYUVOverlay(plat_sdl_overlay); - rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb, - g_screen_width * g_screen_height); + if (area.w <= plat_sdl_overlay->w && area.h <= plat_sdl_overlay->h) + rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb, + area.w, area.h, g_screen_ppitch, + plat_sdl_overlay->pitches[0]/2, + plat_sdl_overlay->w >= 2*area.w); SDL_UnlockYUVOverlay(plat_sdl_overlay); SDL_DisplayYUVOverlay(plat_sdl_overlay, &dstrect); } else if (plat_sdl_gl_active) { - gl_flip(shadow_fb, g_screen_width, g_screen_height); + gl_flip(shadow_fb, g_screen_ppitch, g_screen_height); } else { - // XXX: no locking, but should be fine with SDL_SWSURFACE? - SDL_Flip(plat_sdl_screen); + if (SDL_MUSTLOCK(plat_sdl_screen)) { + SDL_UnlockSurface(plat_sdl_screen); + SDL_Flip(plat_sdl_screen); + SDL_LockSurface(plat_sdl_screen); + } else + SDL_Flip(plat_sdl_screen); + g_screen_ppitch = plat_sdl_screen->pitch/2; g_screen_ptr = plat_sdl_screen->pixels; + plat_video_set_buffer(g_screen_ptr); + if (clear_buf_cnt) { + memset(g_screen_ptr, 0, plat_sdl_screen->pitch*plat_sdl_screen->h); + clear_buf_cnt--; + } + } + + // for overlay/gl modes buffer ptr may change on resize + if ((plat_sdl_overlay || plat_sdl_gl_active) && + (g_screen_ptr != shadow_fb || g_screen_ppitch != g_screen_width)) { + g_screen_ppitch = g_screen_width; + g_screen_ptr = shadow_fb; + plat_video_set_buffer(g_screen_ptr); + } + if (clear_stat_cnt) { + unsigned short *d = (unsigned short *)g_screen_ptr + g_screen_ppitch * g_screen_height; + int l = g_screen_ppitch * 8; + memset((int *)(d - l), 0, l * 2); + clear_stat_cnt--; } } @@ -135,19 +270,39 @@ void plat_video_wait_vsync(void) { } +void plat_video_clear_status(void) +{ + clear_stat_cnt = 3; // do it thrice in case of triple buffering +} + +void plat_video_clear_buffers(void) +{ + if (plat_sdl_overlay || plat_sdl_gl_active) + memset(shadow_fb, 0, g_menuscreen_w * g_menuscreen_h * 2); + else { + memset(g_screen_ptr, 0, plat_sdl_screen->pitch*plat_sdl_screen->h); + clear_buf_cnt = 3; // do it thrice in case of triple buffering + } +} + void plat_video_menu_enter(int is_rom_loaded) { - plat_sdl_change_video_mode(g_menuscreen_w, g_menuscreen_h, 0); - g_screen_ptr = shadow_fb; + if (SDL_MUSTLOCK(plat_sdl_screen)) + SDL_UnlockSurface(plat_sdl_screen); } void plat_video_menu_begin(void) { - if (plat_sdl_overlay != NULL || plat_sdl_gl_active) { + plat_sdl_change_video_mode(g_menuscreen_w, g_menuscreen_h, 1); + resize_buffers(); + if (plat_sdl_overlay || plat_sdl_gl_active) { + g_menuscreen_pp = g_menuscreen_w; g_menuscreen_ptr = shadow_fb; } else { - SDL_LockSurface(plat_sdl_screen); + if (SDL_MUSTLOCK(plat_sdl_screen)) + SDL_LockSurface(plat_sdl_screen); + g_menuscreen_pp = plat_sdl_screen->pitch / 2; g_menuscreen_ptr = plat_sdl_screen->pixels; } } @@ -159,21 +314,24 @@ void plat_video_menu_end(void) { 0, 0, plat_sdl_screen->w, plat_sdl_screen->h }; SDL_LockYUVOverlay(plat_sdl_overlay); - rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb, - g_menuscreen_w * g_menuscreen_h); + if (g_menuscreen_w <= plat_sdl_overlay->w && g_menuscreen_h <= plat_sdl_overlay->h) + rgb565_to_uyvy(plat_sdl_overlay->pixels[0], shadow_fb, + g_menuscreen_w, g_menuscreen_h, g_menuscreen_pp, + plat_sdl_overlay->pitches[0]/2, + plat_sdl_overlay->w >= 2 * g_menuscreen_w); SDL_UnlockYUVOverlay(plat_sdl_overlay); SDL_DisplayYUVOverlay(plat_sdl_overlay, &dstrect); } else if (plat_sdl_gl_active) { - gl_flip(g_menuscreen_ptr, g_menuscreen_w, g_menuscreen_h); + gl_flip(g_menuscreen_ptr, g_menuscreen_pp, g_menuscreen_h); } else { - SDL_UnlockSurface(plat_sdl_screen); + if (SDL_MUSTLOCK(plat_sdl_screen)) + SDL_UnlockSurface(plat_sdl_screen); SDL_Flip(plat_sdl_screen); } g_menuscreen_ptr = NULL; - } void plat_video_menu_leave(void) @@ -182,19 +340,56 @@ void plat_video_menu_leave(void) void plat_video_loop_prepare(void) { - plat_sdl_change_video_mode(g_screen_width, g_screen_height, 0); + // take over any new vout settings + plat_sdl_change_video_mode(0, 0, 0); + area.w = g_menuscreen_w, area.h = g_menuscreen_h; + resize_buffers(); - if (plat_sdl_overlay != NULL || plat_sdl_gl_active) { + // switch over to scaled output if available, but keep the aspect ratio + if (plat_sdl_overlay || plat_sdl_gl_active) { + if (g_menuscreen_w * 240 >= g_menuscreen_h * 320) { + g_screen_width = (240 * g_menuscreen_w/g_menuscreen_h) & ~1; + g_screen_height= 240; + } else { + g_screen_width = 320; + g_screen_height= (320 * g_menuscreen_h/g_menuscreen_w) & ~1; + } + g_screen_ppitch = g_screen_width; g_screen_ptr = shadow_fb; } else { - SDL_LockSurface(plat_sdl_screen); + g_screen_width = plat_sdl_screen->w; + g_screen_height = plat_sdl_screen->h; + g_screen_ppitch = plat_sdl_screen->pitch/2; + if (SDL_MUSTLOCK(plat_sdl_screen)) + SDL_LockSurface(plat_sdl_screen); g_screen_ptr = plat_sdl_screen->pixels; } + + plat_video_set_size(g_screen_width, g_screen_height); + plat_video_set_buffer(g_screen_ptr); } -void plat_early_init(void) +static void plat_sdl_resize(int w, int h) { + // take over new settings + if (plat_sdl_screen->w != area.w || plat_sdl_screen->h != area.h) { +#if defined(__OPENDINGUX__) + if (currentConfig.vscaling != EOPT_SCALE_HW && + plat_sdl_screen->w == 320 && + plat_sdl_screen->h == 480) { + g_menuscreen_h = 240; + g_menuscreen_w = 320; + + } else +#endif + { + g_menuscreen_h = plat_sdl_screen->h; + g_menuscreen_w = plat_sdl_screen->w; + } + resize_buffers(); + rendstatus_old = -1; + } } static void plat_sdl_quit(void) @@ -205,27 +400,31 @@ static void plat_sdl_quit(void) void plat_init(void) { - int shadow_size; int ret; ret = plat_sdl_init(); if (ret != 0) exit(1); +#if defined(__OPENDINGUX__) + // opendingux on JZ47x0 may falsely report a HW overlay, fix to window + plat_target.vout_method = 0; +#endif plat_sdl_quit_cb = plat_sdl_quit; + plat_sdl_resize_cb = plat_sdl_resize; + SDL_ShowCursor(0); SDL_WM_SetCaption("PicoDrive " VERSION, NULL); - g_menuscreen_w = plat_sdl_screen->w; - g_menuscreen_h = plat_sdl_screen->h; + g_menuscreen_pp = g_menuscreen_w; g_menuscreen_ptr = NULL; shadow_size = g_menuscreen_w * g_menuscreen_h * 2; if (shadow_size < 320 * 480 * 2) shadow_size = 320 * 480 * 2; - shadow_fb = malloc(shadow_size); - g_menubg_ptr = malloc(shadow_size); + shadow_fb = calloc(1, shadow_size); + g_menubg_ptr = calloc(1, shadow_size); if (shadow_fb == NULL || g_menubg_ptr == NULL) { fprintf(stderr, "OOM\n"); exit(1); @@ -233,12 +432,21 @@ void plat_init(void) g_screen_width = 320; g_screen_height = 240; + g_screen_ppitch = 320; g_screen_ptr = shadow_fb; - in_sdl_init(in_sdl_defbinds, plat_sdl_event_handler); + plat_target_setup_input(); + in_sdl_platform_data.defbinds = in_sdl_defbinds, + in_sdl_platform_data.kmap_size = in_sdl_key_map_sz, + in_sdl_platform_data.key_map = in_sdl_key_map, + in_sdl_platform_data.jmap_size = in_sdl_joy_map_sz, + in_sdl_platform_data.joy_map = in_sdl_joy_map, + in_sdl_platform_data.key_names = in_sdl_key_names, + in_sdl_init(&in_sdl_platform_data, plat_sdl_event_handler); in_probe(); bgr_to_uyvy_init(); + linux_menu_init(); } void plat_finish(void) diff --git a/platform/common/plat_sdl.h b/platform/common/plat_sdl.h new file mode 100644 index 00000000..cecdb2b3 --- /dev/null +++ b/platform/common/plat_sdl.h @@ -0,0 +1,10 @@ + +extern const struct in_default_bind *in_sdl_defbinds; +extern const struct menu_keymap *in_sdl_key_map; +extern const int in_sdl_key_map_sz; +extern const struct menu_keymap *in_sdl_joy_map; +extern const int in_sdl_joy_map_sz; +extern const char * const *in_sdl_key_names; +extern const char *plat_device; + +void linux_menu_init(void); diff --git a/platform/common/upscale.c b/platform/common/upscale.c new file mode 100644 index 00000000..0aadbb19 --- /dev/null +++ b/platform/common/upscale.c @@ -0,0 +1,609 @@ +/* + * upscale.c image upscaling + * + * This file contains upscalers for picodrive. + * + * scaler types: + * nn: nearest neighbour + * snn: "smoothed" nearest neighbour (see below) + * bln: n-level-bilinear with n quantized weights + * quantization: 0: a<1/(2*n), 1/n: 1/(2*n)<=a<3/(2*n), etc + * currently n=2, n=4 are implemented (there's n=8 mixing, but no filters) + * [NB this has been brought to my attn, which is probably the same as bl2: + * https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1] + * + * "smoothed" nearest neighbour: uses the average of the source pixels if no + * source pixel covers more than 65% of the result pixel. It definitely + * looks better than nearest neighbour and is still quite fast. It creates + * a sharper look than a bilinear filter, at the price of some visible jags + * on diagonal edges. + * + * example scaling modes: + * 256x_Y_ -> 320x_Y_, H32/mode 4, PAR 5:4, for PAL DAR 4:3 (NTSC 7% aspect err) + * 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (PAL 7% aspect err) + * 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (PAL 7% etc etc...) + * 160x144 -> 320x240: GG, PAR 6:5, scaling to 320x240 for DAR 4:3 + * + * (C) 2021 irixxxx + * + * This work is licensed under the terms of any of these licenses + * (at your option): + * - GNU GPL, version 2 or later. + * - MAME license. + */ + +#include "upscale.h" + +/* X x Y -> X*5/4 x Y */ +void upscale_clut_nn_x_4_5(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_nn_4_5(di, ds, si, ss, width, f_nop); + } +} + +void upscale_rgb_nn_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_nn_4_5(di, ds, si, ss, width, f_pal); + } +} + +void upscale_rgb_snn_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_snn_4_5(di, ds, si, ss, width, f_pal); + } +} + +void upscale_rgb_bl2_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_bl2_4_5(di, ds, si, ss, width, f_pal); + } +} + +void upscale_rgb_bl4_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_bl4_4_5(di, ds, si, ss, width, f_pal); + } +} + +/* X x Y -> X*5/4 x Y*17/16 */ +void upscale_clut_nn_x_4_5_y_16_17(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height) +{ + int swidth = width * 5/4; + int y, j; + + for (y = 0; y < height; y += 16) { + for (j = 0; j < 8; j++) { + h_upscale_nn_4_5(di, ds, si, ss, width, f_nop); + } + di += ds; + for (j = 0; j < 8; j++) { + h_upscale_nn_4_5(di, ds, si, ss, width, f_nop); + } + + di -= 9*ds; + v_copy(&di[0], &di[-ds], swidth, f_nop); + di += 9*ds; + } +} + +void upscale_rgb_nn_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int swidth = width * 5/4; + int y, j; + + for (y = 0; y < height; y += 16) { + for (j = 0; j < 8; j++) { + h_upscale_nn_4_5(di, ds, si, ss, width, f_pal); + } + di += ds; + for (j = 0; j < 8; j++) { + h_upscale_nn_4_5(di, ds, si, ss, width, f_pal); + } + + di -= 9*ds; + v_copy(&di[0], &di[-ds], swidth, f_nop); + di += 9*ds; + } +} + +void upscale_rgb_snn_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int swidth = width * 5/4; + int y, j; + + for (y = 0; y < height; y += 16) { + for (j = 0; j < 8; j++) { + h_upscale_snn_4_5(di, ds, si, ss, width, f_pal); + } + di += ds; + for (j = 0; j < 8; j++) { + h_upscale_snn_4_5(di, ds, si, ss, width, f_pal); + } + + /* mix lines 6-8 */ + di -= 9*ds; + v_mix(&di[0], &di[-ds], &di[ds], swidth, p_05, f_nop); + v_mix(&di[-ds], &di[-2*ds], &di[-ds], swidth, p_05, f_nop); + v_mix(&di[ ds], &di[ ds], &di[ 2*ds], swidth, p_05, f_nop); + di += 9*ds; + } +} + +void upscale_rgb_bl2_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int swidth = width * 5/4; + int y, j; + + for (y = 0; y < height; y += 16) { + for (j = 0; j < 4; j++) { + h_upscale_bl2_4_5(di, ds, si, ss, width, f_pal); + } + di += ds; + for (j = 0; j < 12; j++) { + h_upscale_bl2_4_5(di, ds, si, ss, width, f_pal); + } + /* mix lines 3-10 */ + di -= 13*ds; + v_mix(&di[0], &di[-ds], &di[ds], swidth, p_05, f_nop); + for (j = 0; j < 7; j++) { + di += ds; + v_mix(&di[0], &di[0], &di[ds], swidth, p_05, f_nop); + } + di += 6*ds; + } +} + +void upscale_rgb_bl4_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int swidth = width * 5/4; + int y, j; + + for (y = 0; y < height; y += 16) { + for (j = 0; j < 2; j++) { + h_upscale_bl4_4_5(di, ds, si, ss, width, f_pal); + } + di += ds; + for (j = 0; j < 14; j++) { + h_upscale_bl4_4_5(di, ds, si, ss, width, f_pal); + } + di -= 15*ds; + /* mixing line 2: line 1 = -ds, line 2 = +ds */ + v_mix(&di[0], &di[-ds], &di[ds], swidth, p_025, f_nop); + di += ds; + /* mixing lines 3-5: line n-1 = 0, line n = +ds */ + for (j = 0; j < 3; j++) { + v_mix(&di[0], &di[0], &di[ds], swidth, p_025, f_nop); + di += ds; + } + /* mixing lines 6-9 */ + for (j = 0; j < 4; j++) { + v_mix(&di[0], &di[0], &di[ds], swidth, p_05, f_nop); + di += ds; + } + /* mixing lines 10-13 */ + for (j = 0; j < 4; j++) { + v_mix(&di[0], &di[0], &di[ds], swidth, p_075, f_nop); + di += ds; + } + /* lines 14-16, already in place */ + di += 3*ds; + } +} + +/* "classic" upscaler as found in several emulators. It's really more like a + * x*4/3, y*16/15 upscaler, with an additional 5th row/17th line just inserted + * from the source image. That gives nice n/4,n/16 alpha values plus better + * symmetry in each block and avoids "borrowing" a row/line between blocks. + */ +void upscale_rgb_bln_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int swidth = width * 5/4; + int y, j; + + for (y = 0; y < height; y += 16) { + for (j = 0; j < 4; j++) { + h_upscale_bln_4_5(di, ds, si, ss, width, f_pal); + } + di += ds; + for (j = 0; j < 12; j++) { + h_upscale_bln_4_5(di, ds, si, ss, width, f_pal); + } + di -= 13*ds; + /* mixing line 4: line 3 = -ds, line 4 = +ds */ + v_mix(&di[0], &di[-ds], &di[ds], swidth, p_025, f_nop); + di += ds; + /* mixing lines 5-6: line n-1 = 0, line n = +ds */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], swidth, p_025, f_nop); + di += ds; + } + /* mixing line 7-9 */ + for (j = 0; j < 3; j++) { + v_mix(&di[0], &di[0], &di[ds], swidth, p_05, f_nop); + di += ds; + } + /* mixing lines 10-12 */ + for (j = 0; j < 3; j++) { + v_mix(&di[0], &di[0], &di[ds], swidth, p_075, f_nop); + di += ds; + } + /* lines 13-16, already in place */ + di += 4*ds; + } +} + +/* experimental 8 level bilinear for quality assessment */ +void upscale_rgb_bl8_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int swidth = width * 5/4; + int y, j; + + for (y = 0; y < 224; y += 16) { + for (j = 0; j < 2; j++) { + h_upscale_bl8_4_5(di, ds, si, ss, width, f_pal); + } + di += ds; + for (j = 0; j < 14; j++) { + h_upscale_bl8_4_5(di, ds, si, ss, width, f_pal); + } + di -= 15*ds; + /* mixing line 2: line 2 = -ds, line 3 = +ds */ + v_mix(&di[0], &di[-ds], &di[ds], swidth, p_0125, f_nop); + di += ds; + /* mixing line 3: line 3 = 0, line 4 = +ds */ + v_mix(&di[0], &di[0], &di[ds], swidth, p_0125, f_nop); + di += ds; + /* mixing lines 4-5: line n-1 = 0, line n = +ds */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], swidth, p_025, f_nop); + di += ds; + } + /* mixing lines 6-7 */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_0375, f_nop); + di += ds; + } + /* mixing lines 8-9 */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_05, f_nop); + di += ds; + } + /* mixing lines 10-11 */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_0625, f_nop); + di += ds; + } + /* mixing lines 12-13 */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_075, f_nop); + di += ds; + } + /* mixing lines 14-15 */ + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[0], &di[ds], 320, p_0875, f_nop); + di += ds; + } + /* line 16, already in place */ + di += ds; + } +} + +/* X x Y -> X x Y*17/16 */ +void upscale_clut_nn_y_16_17(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height) +{ + int y, j; + + for (y = 0; y < height; y += 16) { + for (j = 0; j < 8; j++) { + h_copy(di, ds, si, ss, width, f_nop); + } + di += ds; + for (j = 0; j < 8; j++) { + h_copy(di, ds, si, ss, width, f_nop); + } + + di -= 9*ds; + v_copy(&di[0], &di[-ds], width, f_nop); + di += 9*ds; + } +} + +void upscale_rgb_nn_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y, j; + + for (y = 0; y < height; y += 16) { + for (j = 0; j < 8; j++) { + h_copy(di, ds, si, ss, width, f_pal); + } + di += ds; + for (j = 0; j < 8; j++) { + h_copy(di, ds, si, ss, width, f_pal); + } + + di -= 9*ds; + v_copy(&di[0], &di[-ds], width, f_nop); + di += 9*ds; + } +} + +void upscale_rgb_snn_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y, j; + + for (y = 0; y < height; y += 16) { + for (j = 0; j < 8; j++) { + h_copy(di, ds, si, ss, width, f_pal); + } + di += ds; + for (j = 0; j < 8; j++) { + h_copy(di, ds, si, ss, width, f_pal); + } + + /* mix lines 6-8 */ + di -= 9*ds; + v_mix(&di[0], &di[-ds], &di[ds], width, p_05, f_nop); + v_mix(&di[-ds], &di[-2*ds], &di[-ds], width, p_05, f_nop); + v_mix(&di[ ds], &di[ ds], &di[ 2*ds], width, p_05, f_nop); + di += 9*ds; + } +} + +void upscale_rgb_bl2_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y, j; + + for (y = 0; y < height; y += 16) { + for (j = 0; j < 4; j++) { + h_copy(di, ds, si, ss, width, f_pal); + } + di += ds; + for (j = 0; j < 12; j++) { + h_copy(di, ds, si, ss, width, f_pal); + } + /* mix lines 4-11 */ + di -= 13*ds; + v_mix(&di[0], &di[-ds], &di[ds], width, p_05, f_nop); + for (j = 0; j < 7; j++) { + di += ds; + v_mix(&di[0], &di[0], &di[ds], width, p_05, f_nop); + } + di += 6*ds; + } +} + +void upscale_rgb_bl4_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y, j; + + for (y = 0; y < height; y += 16) { + for (j = 0; j < 2; j++) { + h_copy(di, ds, si, ss, width, f_pal); + } + di += ds; + for (j = 0; j < 14; j++) { + h_copy(di, ds, si, ss, width, f_pal); + } + di -= 15*ds; + /* mixing line 2: line 1 = -ds, line 2 = +ds */ + v_mix(&di[0], &di[-ds], &di[ds], width, p_025, f_nop); + di += ds; + /* mixing lines 3-5: line n-1 = 0, line n = +ds */ + for (j = 0; j < 3; j++) { + v_mix(&di[0], &di[0], &di[ds], width, p_025, f_nop); + di += ds; + } + /* mixing lines 6-9 */ + for (j = 0; j < 4; j++) { + v_mix(&di[0], &di[0], &di[ds], width, p_05, f_nop); + di += ds; + } + /* mixing lines 10-13 */ + for (j = 0; j < 4; j++) { + v_mix(&di[0], &di[0], &di[ds], width, p_075, f_nop); + di += ds; + } + /* lines 14-16, already in place */ + di += 3*ds; + } +} + +/* X x Y -> X*2/1 x Y, e.g. for X 160->320 (GG) */ +void upscale_clut_nn_x_1_2(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_nn_1_2(di, ds, si, ss, width, f_nop); + } +} + +void upscale_rgb_nn_x_1_2(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_nn_1_2(di, ds, si, ss, width, f_pal); + } +} + +void upscale_rgb_bl2_x_1_2(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y; + + for (y = 0; y < height; y++) { + h_upscale_bl2_1_2(di, ds, si, ss, width, f_pal); + } +} + +/* X x Y -> X*2/1 x Y*5/3 (GG) */ +void upscale_clut_nn_x_1_2_y_3_5(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height) +{ + int swidth = width * 2; + int y, j; + + for (y = 0; y < height; y += 3) { + /* lines 0,2,4 */ + for (j = 0; j < 3; j++) { + h_upscale_nn_1_2(di, ds, si, ss, width, f_nop); + di += ds; + } + /* lines 1,3 */ + di -= 5*ds; + for (j = 0; j < 2; j++) { + v_copy(&di[0], &di[-ds], swidth, f_nop); + di += 2*ds; + } + } +} + +void upscale_rgb_nn_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int swidth = width * 2; + int y, j; + + for (y = 0; y < height; y += 3) { + for (j = 0; j < 3; j++) { + h_upscale_nn_1_2(di, ds, si, ss, width, f_pal); + di += ds; + } + di -= 5*ds; + for (j = 0; j < 2; j++) { + v_copy(&di[0], &di[-ds], swidth, f_nop); + di += 2*ds; + } + } +} + +void upscale_rgb_bl2_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int swidth = width * 2; + int y, j; + + for (y = 0; y < height; y += 3) { + for (j = 0; j < 3; j++) { + h_upscale_bl2_1_2(di, ds, si, ss, width, f_pal); + di += ds; + } + di -= 5*ds; + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[-ds], &di[ds], swidth, p_05, f_nop); + di += 2*ds; + } + } +} + +void upscale_rgb_bl4_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int swidth = width * 2; + int y, j, d; + + /* for 1st block backwards reference virtually duplicate source line 0 */ + for (y = 0, d = 2*ds; y < height; y += 3, d = -ds) { + di += 2*ds; + for (j = 0; j < 3; j++) { + h_upscale_bl2_1_2(di, ds, si, ss, width, f_pal); + } + di -= 5*ds; + v_mix(&di[0], &di[d ], &di[2*ds], swidth, p_05, f_nop); /*-1+0 */ + di += ds; + v_mix(&di[0], &di[ds], &di[2*ds], swidth, p_075, f_nop);/* 0+1 */ + di += ds; + v_mix(&di[0], &di[ 0], &di[ ds], swidth, p_025, f_nop);/* 0+1 */ + di += ds; + v_mix(&di[0], &di[ 0], &di[ ds], swidth, p_05, f_nop); /* 1+2 */ + di += 2*ds; + } +} + +/* X x Y -> X x Y*5/3, e.g. for Y 144->240 (GG) */ +void upscale_clut_nn_y_3_5(u8 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height) +{ + int y, j; + + for (y = 0; y < height; y += 3) { + /* lines 0,2,4 */ + for (j = 0; j < 3; j++) { + h_copy(di, ds, si, ss, width, f_nop); + di += ds; + } + /* lines 1,3 */ + di -= 5*ds; + for (j = 0; j < 2; j++) { + v_copy(&di[0], &di[-ds], width, f_nop); + di += 2*ds; + } + } +} + +void upscale_rgb_nn_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y, j; + + for (y = 0; y < height; y += 3) { + for (j = 0; j < 3; j++) { + h_copy(di, ds, si, ss, width, f_pal); + di += ds; + } + di -= 5*ds; + for (j = 0; j < 2; j++) { + v_copy(&di[0], &di[-ds], width, f_nop); + di += 2*ds; + } + } +} + +void upscale_rgb_bl2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y, j; + + for (y = 0; y < height; y += 3) { + for (j = 0; j < 3; j++) { + h_copy(di, ds, si, ss, width, f_pal); + di += ds; + } + di -= 5*ds; + for (j = 0; j < 2; j++) { + v_mix(&di[0], &di[-ds], &di[ds], width, p_05, f_nop); + di += 2*ds; + } + } +} + +void upscale_rgb_bl4_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal) +{ + int y, j, d; + + /* for 1st block backwards reference virtually duplicate source line 0 */ + for (y = 0, d = 2*ds; y < height; y += 3, d = -ds) { + di += 2*ds; + for (j = 0; j < 3; j++) { + h_copy(di, ds, si, ss, width, f_pal); + } + di -= 5*ds; + v_mix(&di[0], &di[d ], &di[2*ds], width, p_05, f_nop); /*-1+0 */ + di += ds; + v_mix(&di[0], &di[ds], &di[2*ds], width, p_075, f_nop);/* 0+1 */ + di += ds; + v_mix(&di[0], &di[ 0], &di[ ds], width, p_025, f_nop);/* 0+1 */ + di += ds; + v_mix(&di[0], &di[ 0], &di[ ds], width, p_05, f_nop); /* 1+2 */ + di += 2*ds; + } +} + diff --git a/platform/common/upscale.h b/platform/common/upscale.h new file mode 100644 index 00000000..74e0c039 --- /dev/null +++ b/platform/common/upscale.h @@ -0,0 +1,650 @@ +/* + * upscale.h image upscaling + * + * This file contains upscalers for picodrive. + * + * scaler types: + * nn: nearest neighbour + * snn: "smoothed" nearest neighbour (see below) + * bln: n-level-bilinear with n quantized weights + * quantization: 0: a<1/(2*n), 1/n: 1/(2*n)<=a<3/(2*n), etc + * currently n=2, n=4 are implemented (there's n=8 mixing, but no filters) + * [NB this has been brought to my attn, which is probably the same as bl2: + * https://www.drdobbs.com/image-scaling-with-bresenham/184405045?pgno=1] + * + * "smoothed" nearest neighbour: uses the average of the source pixels if no + * source pixel covers more than 65% of the result pixel. It definitely + * looks better than nearest neighbour and is still quite fast. It creates + * a sharper look than a bilinear filter, at the price of some visible jags + * on diagonal edges. + * + * example scaling modes: + * 256x_Y_ -> 320x_Y_, H32/mode 4, PAR 5:4, for PAL DAR 4:3 (NTSC 7% aspect err) + * 256x224 -> 320x240, H32/mode 4, PAR 5:4, for NTSC DAR 4:3 (PAL 7% aspect err) + * 320x224 -> 320x240, PAR 1:1, for NTSC, DAR 4:3 (PAL 7% etc etc...) + * 160x144 -> 320x240: GG, PAR 6:5, scaling to 320x240 for DAR 4:3 + * + * + * (C) 2021 irixxxx + * + * This work is licensed under the terms of any of these licenses + * (at your option): + * - GNU GPL, version 2 or later. + * - MAME license. + * See COPYING file in the top-level directory. + */ +#include + + +/* LSB of all colors in 1 or 2 pixels */ +#if defined(USE_BGR555) +#define PXLSB 0x04210421 +#else +#define PXLSB 0x08210821 +#endif + +/* RGB565 pixel mixing, see https://www.compuphase.com/graphic/scale3.htm and + http://blargg.8bitalley.com/info/rgb_mixing.html */ +/* 2-level mixing. NB blargg version isn't 2-pixel-at-once safe for RGB565 */ +//#define p_05(d,p1,p2) d=(((p1)+(p2) + ( ((p1)^(p2))&PXLSB))>>1) // round up +//#define p_05(d,p1,p2) d=(((p1)+(p2) - ( ((p1)^(p2))&PXLSB))>>1) // round down +#define p_05(d,p1,p2) d=(((p1)&(p2)) + ((((p1)^(p2))&~PXLSB)>>1)) +/* 4-level mixing, 2 times slower */ +// 1/4*p1 + 3/4*p2 = 1/2*(1/2*(p1+p2) + p2) +#define p_025(d,p1,p2) p_05(t, p1, p2); p_05( d, t, p2) +#define p_075(d,p1,p2) p_025(d,p2,p1) +/* 8-level mixing, 3 times slower */ +// 1/8*p1 + 7/8*p2 = 1/2*(1/2*(1/2*(p1+p2) + p2) + p2) +#define p_0125(d,p1,p2) p_05(t, p1, p2); p_05( u, t, p2); p_05( d, u, p2) +// 3/8*p1 + 5/8*p2 = 1/2*(1/2*(1/2*(p1+p2) + p2) + 1/2*(p1+p2)) +#define p_0375(d,p1,p2) p_05(t, p1, p2); p_05( u, t, p2); p_05( d, u, t) +#define p_0625(d,p1,p2) p_0375(d,p2,p1) +#define p_0875(d,p1,p2) p_0125(d,p2,p1) + +/* pixel transforms */ +#define f_pal(v) pal[v] // convert CLUT index -> RGB565 +#define f_nop(v) (v) // source already in dest format (CLUT/RGB) +#define f_or(v) (v|pal) // CLUT, add palette selection + +/* +scalers h: +256->320: - (4:5) (256x224/240 -> 320x224/240) +256->299: - (6:7) (256x224 -> 299x224, alt?) +160->320: - (1:2) 2x (160x144 -> 320x240, GG) +160->288: - (5:9) (160x144 -> 288x216, GG alt?) +*/ + +/* scale 4:5 */ +#define h_upscale_nn_4_5(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = w/4; i > 0; i--, si += 4, di += 5) { \ + di[0] = f(si[0]); \ + di[1] = f(si[1]); \ + di[2] = f(si[1]); \ + di[3] = f(si[2]); \ + di[4] = f(si[3]); \ + } \ + di += ds - w/4*5; \ + si += ss - w; \ +} while (0) + +// reverse version for overlapping buffers +#define rh_upscale_nn_4_5(di,ds,si,ss,w,f) do { \ + int i; \ + di += w/4*5; \ + si += w; \ + for (i = w/4; i > 0; i--, si -= 4, di -= 5) { \ + di[-1] = f(si[-1]); \ + di[-2] = f(si[-2]); \ + di[-3] = f(si[-3]); \ + di[-4] = f(si[-3]); \ + di[-5] = f(si[-4]); \ + } \ + di += ds; \ + si += ss; \ +} while (0) + +#define h_upscale_snn_4_5(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = w/4; i > 0; i--, si += 4, di += 5) { \ + di[0] = f(si[0]); \ + di[1] = f(si[1]); \ + p_05(di[2], f(si[1]),f(si[2])); \ + di[3] = f(si[2]); \ + di[4] = f(si[3]); \ + } \ + di += ds - w/4*5; \ + si += ss - w; \ +} while (0) + +#define h_upscale_bln_4_5(di,ds,si,ss,w,f) do { \ + int i; u16 t; \ + for (i = w/4; i > 0; i--, si += 4, di += 5) { \ + di[0] = f(si[0]); \ + p_025(di[1], f(si[0]),f(si[1])); \ + p_05 (di[2], f(si[1]),f(si[2])); \ + p_075(di[3], f(si[2]),f(si[3])); \ + di[4] = f(si[3]); \ + } \ + di += ds - w/4*5; \ + si += ss - w; \ +} while (0) + +#define h_upscale_bl2_4_5(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = w/4; i > 0; i--, si += 4, di += 5) { \ + di[0] = f(si[0]); \ + p_05(di[1], f(si[0]),f(si[1])); \ + p_05(di[2], f(si[1]),f(si[2])); \ + di[3] = f(si[2]); \ + di[4] = f(si[3]); \ + } \ + di += ds - w/4*5; \ + si += ss - w; \ +} while (0) + +#define h_upscale_bl4_4_5(di,ds,si,ss,w,f) do { \ + int i; u16 t, p = f(si[0]); \ + for (i = w/4; i > 0; i--, si += 4, di += 5) { \ + p_025(di[0], p, f(si[0])); \ + p_05 (di[1], f(si[0]),f(si[1])); \ + p_05 (di[2], f(si[1]),f(si[2])); \ + p_075(di[3], f(si[2]),f(si[3])); \ + di[4] = p = f(si[3]); \ + } \ + di += ds - w/4*5; \ + si += ss - w; \ +} while (0) + +#define h_upscale_bl8_4_5(di,ds,si,ss,w,f) do { \ + int i; u16 t, u, p = f(si[0]); \ + for (i = w/4; i > 0; i--, si += 4, di += 5) { \ + p_025 (di[0], p, f(si[0])); \ + p_0375(di[1], f(si[0]),f(si[1])); \ + p_0625(di[2], f(si[1]),f(si[2])); \ + p_075 (di[3], f(si[2]),f(si[3])); \ + di[4] = p = f(si[3]); \ + } \ + di += ds - w/4*5; \ + si += ss - w; \ +} while (0) + +/* scale 6:7 */ +#define h_upscale_nn_6_7(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = w/6; i > 0; i--, si += 6, di += 7) { \ + di[0] = f(si[0]); \ + di[1] = f(si[1]); \ + di[2] = f(si[2]); \ + di[3] = f(si[2]); \ + di[4] = f(si[3]); \ + di[5] = f(si[4]); \ + di[6] = f(si[5]); \ + } \ + di += ds - w/6*7; \ + si += ss - w; \ +} while (0) + +// reverse version for overlapping buffers +#define rh_upscale_nn_6_7(di,ds,si,ss,w,f) do { \ + int i; \ + di += w/6*7; \ + si += w; \ + for (i = w/6; i > 0; i--, si -= 6, di -= 7) { \ + di[-1] = f(si[-1]); \ + di[-2] = f(si[-2]); \ + di[-3] = f(si[-3]); \ + di[-4] = f(si[-4]); \ + di[-5] = f(si[-4]); \ + di[-6] = f(si[-5]); \ + di[-7] = f(si[-6]); \ + } \ + di += ds; \ + si += ss; \ +} while (0) + +#define h_upscale_snn_6_7(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = w/6; i > 0; i--, si += 6, di += 7) { \ + di[0] = f(si[0]); \ + di[1] = f(si[1]); \ + di[2] = f(si[2]); \ + p_05(di[3], f(si[2]),f(si[3])); \ + di[4] = f(si[3]); \ + di[5] = f(si[4]); \ + di[6] = f(si[5]); \ + } \ + di += ds - w/6*7; \ + si += ss - w; \ +} while (0) + +#define h_upscale_bl2_6_7(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = w/6; i > 0; i--, si += 6, di += 7) { \ + di[0] = f(si[0]); \ + p_05(di[1], f(si[0]),f(si[1])); \ + p_05(di[2], f(si[1]),f(si[2])); \ + p_05(di[3], f(si[2]),f(si[3])); \ + p_05(di[4], f(si[3]),f(si[4])); \ + di[5] = f(si[4]); \ + di[6] = f(si[5]); \ + } \ + di += ds - w/6*7; \ + si += ss - w; \ +} while (0) + +#define h_upscale_bl4_6_7(di,ds,si,ss,w,f) do { \ + int i; u16 t p = f(si[0]); \ + for (i = w/6; i > 0; i--, si += 6, di += 7) { \ + p_025(di[0], p, f(si[0])); \ + p_025(di[1], f(si[0]),f(si[1])); \ + p_05 (di[2], f(si[1]),f(si[2])); \ + p_05 (di[3], f(si[2]),f(si[3])); \ + p_075(di[4], f(si[3]),f(si[4])); \ + p_075(di[5], f(si[4]),f(si[5])); \ + di[6] = p = f(si[5]); \ + } \ + di += ds - w/6*7; \ + si += ss - w; \ +} while (0) + +/* scale 5:9 */ +#define h_upscale_nn_5_9(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = w/5; i > 0; i--, si += 5, di += 9) { \ + di[0] = f(si[0]); \ + di[1] = f(si[0]); \ + di[2] = f(si[1]); \ + di[3] = f(si[1]); \ + di[4] = f(si[2]); \ + di[5] = f(si[3]); \ + di[6] = f(si[3]); \ + di[7] = f(si[4]); \ + di[8] = f(si[4]); \ + } \ + di += ds - w/5*9; \ + si += ss - w; \ +} while (0) + +#define h_upscale_snn_5_9(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = w/5; i > 0; i--, si += 5, di += 9) { \ + di[0] = f(si[0]); \ + di[1] = f(si[0]); \ + di[2] = f(si[1]); \ + p_05(di[3], f(si[1]),f(si[2])); \ + di[4] = f(si[2]); \ + p_05(di[5], f(si[2]),f(si[3])); \ + di[6] = f(si[3]); \ + di[7] = f(si[4]); \ + di[8] = f(si[4]); \ + } \ + di += ds - w/5*9; \ + si += ss - w; \ +} while (0) + +#define h_upscale_bl2_5_9(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = w/5; i > 0; i--, si += 5, di += 9) { \ + di[0] = f(si[0]); \ + p_05(di[1], f(si[0]),f(si[1])); \ + di[2] = f(si[1]); \ + p_05(di[3], f(si[1]),f(si[2])); \ + di[4] = f(si[2]); \ + p_05(di[5], f(si[2]),f(si[3])); \ + di[6] = f(si[3]); \ + p_05(di[7], f(si[3]),f(si[4])); \ + di[8] = f(si[4]); \ + } \ + di += ds - w/5*9; \ + si += ss - w; \ +} while (0) + +#define h_upscale_bl4_5_9(di,ds,si,ss,w,f) do { \ + int i; u16 t, p = f(si[0]); \ + for (i = w/5; i > 0; i--, si += 5, di += 9) { \ + p_05 (di[0], p, f(si[0])); \ + di[1] = f(si[0]); \ + p_025(di[2], f(si[0]),f(si[1])); \ + p_075(di[3], f(si[1]),f(si[2])); \ + p_025(di[4], f(si[1]),f(si[2])); \ + p_075(di[5], f(si[2]),f(si[3])); \ + di[6] = f(si[3]); \ + p_05 (di[7], f(si[3]),f(si[4])); \ + di[8] = p = f(si[4]); \ + } \ + di += ds - w/5*9; \ + si += ss - w; \ +} while (0) + +/* scale 1:2 integer scale */ +#define h_upscale_nn_1_2(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = w/2; i > 0; i--, si += 2, di += 4) { \ + di[0] = f(si[0]); \ + di[1] = f(si[0]); \ + di[2] = f(si[1]); \ + di[3] = f(si[1]); \ + } \ + di += ds - w*2; \ + si += ss - w; \ +} while (0) + +// reverse version for overlapping buffers +#define rh_upscale_nn_1_2(di,ds,si,ss,w,f) do { \ + int i; \ + di += w*2; \ + si += w; \ + for (i = w/2; i > 0; i--, si -= 2, di -= 4) { \ + di[-1] = f(si[-1]); \ + di[-2] = f(si[-1]); \ + di[-3] = f(si[-2]); \ + di[-4] = f(si[-2]); \ + } \ + di += ds; \ + si += ss; \ +} while (0) + +#define h_upscale_bl2_1_2(di,ds,si,ss,w,f) do { \ + int i; uint p = f(si[0]); \ + for (i = w/2; i > 0; i--, si += 2, di += 4) { \ + p_05 (di[0], p, f(si[0])); \ + di[1] = f(si[0]); \ + p_05 (di[2], f(si[0]),f(si[1])); \ + di[3] = p = f(si[1]); \ + } \ + di += ds - w*2; \ + si += ss - w; \ +} while (0) + +/* scale 1:1, copy */ +#define h_copy(di,ds,si,ss,w,f) do { \ + int i; \ + for (i = w/4; i > 0; i--, si += 4, di += 4) { \ + di[0] = f(si[0]); \ + di[1] = f(si[1]); \ + di[2] = f(si[2]); \ + di[3] = f(si[3]); \ + } \ + di += ds - w; \ + si += ss - w; \ +} while (0) + +/* +scalers v: +224->240: - (14:15) (256/320x224 -> 320x240) +224->238: - (16:17) (256/320x224 -> 320x238 alt?) +144->240: - (3:5) (160x144 -> 320x240, GG) +144->216: - (2:3) (160x144 -> 288x216, GG alt?) +*/ + +#define v_mix(di,li,ri,w,p_mix,f) do { \ + int i; u32 t, u; (void)t, (void)u; \ + for (i = 0; i < w; i += 4) { \ + p_mix((di)[i ], f((li)[i ]),f((ri)[i ])); \ + p_mix((di)[i+1], f((li)[i+1]),f((ri)[i+1])); \ + p_mix((di)[i+2], f((li)[i+2]),f((ri)[i+2])); \ + p_mix((di)[i+3], f((li)[i+3]),f((ri)[i+3])); \ + } \ +} while (0) + +#define v_copy(di,ri,w,f) do { \ + int i; \ + for (i = 0; i < w; i += 4) { \ + (di)[i ] = f((ri)[i ]); \ + (di)[i+1] = f((ri)[i+1]); \ + (di)[i+2] = f((ri)[i+2]); \ + (di)[i+3] = f((ri)[i+3]); \ + } \ +} while (0) + +/* scale 14:15 */ +#define v_upscale_nn_14_15(di,ds,w,l) do { \ + if (++l == 7) { \ + di += ds; \ + } else if (l >= 14) { \ + l = 0; \ + di -= 7*ds; \ + v_copy(&di[0], &di[-ds], w, f_nop); \ + di += 7*ds; \ + } \ +} while (0) + +#define v_upscale_snn_14_15(di,ds,w,l) do { \ + if (++l == 7) { \ + di += ds; \ + } else if (l >= 14) { \ + l = 0; \ + di -= 7*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \ + v_mix(&di[-ds], &di[-2*ds], &di[-ds], w, p_05, f_nop); \ + v_mix(&di[ ds], &di[ ds], &di[ 2*ds], w, p_05, f_nop); \ + di += 7*ds; \ + } \ +} while (0) + +#define v_upscale_bl2_14_15(di,ds,w,l) do { \ + if (++l == 3) { \ + di += ds; \ + } else if (l >= 14) { \ + int j; \ + l = 0; \ + di -= 11*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \ + for (j = 0; j < 7; j++) { \ + di += ds; \ + v_mix(&di[0], &di[0], &di[ds], w, p_05, f_nop); \ + } \ + di += 4*ds; \ + } \ +} while (0) + +#define v_upscale_bl4_14_15(di,ds,w,l) do { \ + if (++l == 1) { \ + di += ds; \ + } else if (l >= 14) { \ + int j; \ + l = 0; \ + di -= 13*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_025, f_nop); \ + di += ds; \ + for (j = 0; j < 3; j++) { \ + v_mix(&di[0], &di[0], &di[ds], w, p_025, f_nop); \ + di += ds; \ + } \ + for (j = 0; j < 4; j++) { \ + v_mix(&di[0], &di[0], &di[ds], w, p_05, f_nop); \ + di += ds; \ + } \ + for (j = 0; j < 4; j++) { \ + v_mix(&di[0], &di[0], &di[ds], w, p_075, f_nop); \ + di += ds; \ + } \ + di += 1*ds; \ + } \ +} while (0) + +/* scale 16:17 */ +#define v_upscale_nn_16_17(di,ds,w,l) do { \ + if (++l == 8) { \ + di += ds; \ + } else if (l >= 16) { \ + l = 0; \ + di -= 8*ds; \ + v_copy(&di[0], &di[-ds], w, f_nop); \ + di += 8*ds; \ + } \ +} while (0) + +#define v_upscale_snn_16_17(di,ds,w,l) do { \ + if (++l == 8) { \ + di += ds; \ + } else if (l >= 16) { \ + l = 0; \ + di -= 8*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \ + v_mix(&di[-ds], &di[-2*ds], &di[-ds], w, p_05, f_nop); \ + v_mix(&di[ ds], &di[ ds], &di[ 2*ds], w, p_05, f_nop); \ + di += 8*ds; \ + } \ +} while (0) + +#define v_upscale_bl2_16_17(di,ds,w,l) do { \ + if (++l == 4) { \ + di += ds; \ + } else if (l >= 16) { \ + int j; \ + l = 0; \ + di -= 12*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \ + for (j = 0; j < 7; j++) { \ + di += ds; \ + v_mix(&di[0], &di[0], &di[ds], w, p_05, f_nop); \ + } \ + di += 5*ds; \ + } \ +} while (0) + +#define v_upscale_bl4_16_17(di,ds,w,l) do { \ + if (++l == 2) { \ + di += ds; \ + } else if (l >= 16) { \ + int j; \ + l = 0; \ + di -= 14*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_025, f_nop); \ + di += ds; \ + for (j = 0; j < 3; j++) { \ + v_mix(&di[0], &di[0], &di[ds], w, p_025, f_nop); \ + di += ds; \ + } \ + for (j = 0; j < 4; j++) { \ + v_mix(&di[0], &di[0], &di[ds], w, p_05, f_nop); \ + di += ds; \ + } \ + for (j = 0; j < 4; j++) { \ + v_mix(&di[0], &di[0], &di[ds], w, p_075, f_nop); \ + di += ds; \ + } \ + di += 2*ds; \ + } \ +} while (0) + +/* scale 3:5 */ +#define v_upscale_nn_3_5(di,ds,w,l) do { \ + if (++l < 3) { \ + di += ds; \ + } else { \ + int j; \ + l = 0; \ + di -= 3*ds; \ + for (j = 0; j < 2; j++) { \ + v_copy(&di[0], &di[-ds], w, f_nop); \ + di += 2*ds; \ + } \ + di -= ds; \ + } \ +} while (0) + +#define v_upscale_snn_3_5(di,ds,w,l) do { \ + if (++l < 3) { \ + di += ds; \ + } else { \ + int j; \ + l = 0; \ + di -= 3*ds; \ + for (j = 0; j < 2; j++) { \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \ + di += 2*ds; \ + } \ + di -= ds; \ + } \ +} while (0) + +/* scale 2:3 */ +#define v_upscale_nn_2_3(di,ds,w,l) do { \ + if (++l < 2) { \ + di += ds; \ + } else { \ + int j; \ + l = 0; \ + di -= 2*ds; \ + v_copy(&di[0], &di[-ds], w, f_nop); \ + di += 2*ds; \ + } \ +} while (0) + +#define v_upscale_snn_2_3(di,ds,w,l) do { \ + if (++l < 2) { \ + di += ds; \ + } else { \ + int j; \ + l = 0; \ + di -= 2*ds; \ + v_mix(&di[0], &di[-ds], &di[ds], w, p_05, f_nop); \ + di += 2*ds; \ + } \ +} while (0) + + +/* exponentially smoothing (for LCD ghosting): y[n] = x[n]*a + y[n-1]*(1-a) */ + +#define PXLSBn (PXLSB*15) // using 4 LSBs of each subpixel for subtracting +// NB implement rounding to x[n] by adding 1 to counter round down if y[n] is +// smaller than x[n]: use some of the lower bits to implement subtraction on +// subpixels, with an additional bit to detect borrow, then add the borrow. +// It's doing the increment wrongly in a lot of cases, which doesn't matter +// much since it will converge to x[n] in a few frames anyway if x[n] is static +#define p_05_round(d,p1,p2) \ + p_05(u, p1, p2); \ + t=(u|~PXLSBn)-(p1&PXLSBn); d = u+(~(t>>4)&PXLSB) +// Unfortunately this won't work for p_025, where adding 1 isn't enough and +// adding 2 would be too much, so offer only p_075 here +#define p_075_round(d,p1,p2) \ + p_075(u, p1, p2); \ + t=(u|~PXLSBn)-(p1&PXLSBn); d = u+(~(t>>4)&PXLSB) + +// this is essentially v_mix and v_copy combined +#define v_blend(di,ri,w,p_mix) do { \ + int i; u32 t, u; (void)t, (void)u; \ + for (i = 0; i < w; i += 4) { \ + p_mix((ri)[i ], (di)[i ],(ri)[i ]); (di)[i ] = (ri)[i ]; \ + p_mix((ri)[i+1], (di)[i+1],(ri)[i+1]); (di)[i+1] = (ri)[i+1]; \ + p_mix((ri)[i+2], (di)[i+2],(ri)[i+2]); (di)[i+2] = (ri)[i+2]; \ + p_mix((ri)[i+3], (di)[i+3],(ri)[i+3]); (di)[i+3] = (ri)[i+3]; \ + } \ +} while (0) + + +/* X x Y -> X*5/4 x Y, for X 256->320 */ +void upscale_rgb_nn_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_snn_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl2_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl4_x_4_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); + +/* X x Y -> X x Y*17/16, for Y 224->238 or 192->204 (SMS) */ +void upscale_rgb_nn_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_snn_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl2_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl4_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); + +/* X x Y -> X*5/4 x Y*17/16 */ +void upscale_rgb_nn_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_snn_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl2_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl4_x_4_5_y_16_17(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); + +/* X x Y -> X*2/1 x Y, e.g. for X 160->320 (GG) */ +void upscale_rgb_nn_x_1_2(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl2_x_1_2(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); + +/* X x Y -> X x Y*5/3, e.g. for Y 144->240 (GG) */ +void upscale_rgb_nn_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl4_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); + +/* X x Y -> X*2/1 x Y*5/3 (GG) */ +void upscale_rgb_nn_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl2_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); +void upscale_rgb_bl4_x_1_2_y_3_5(u16 *__restrict di, int ds, u8 *__restrict si, int ss, int width, int height, u16 *pal); + diff --git a/platform/common/version.h b/platform/common/version.h index ec86a564..ca33f521 100644 --- a/platform/common/version.h +++ b/platform/common/version.h @@ -1 +1 @@ -#define VERSION "1.90" +#define VERSION "" \ No newline at end of file diff --git a/platform/gizmondo/Makefile b/platform/gizmondo/Makefile index d7aab76a..31530d79 100644 --- a/platform/gizmondo/Makefile +++ b/platform/gizmondo/Makefile @@ -7,7 +7,6 @@ asm_memory = 1 asm_render = 1 asm_ym2612 = 1 asm_misc = 1 -asm_cdpico = 1 asm_cdmemory = 1 amalgamate = 0 #profile = 1 @@ -65,6 +64,7 @@ OBJS += pico/sound/sound.o endif OBJS += pico/sound/mix_asm.o OBJS += pico/sound/sn76496.o pico/sound/ym2612.o +OBJS += pico/sound/emu2413/emu2413.o # zlib OBJS += zlib/gzio.o zlib/inffast.o zlib/inflate.o zlib/inftrees.o zlib/trees.o \ zlib/deflate.o zlib/crc32.o zlib/adler32.o zlib/zutil.o zlib/compress.o @@ -99,6 +99,9 @@ ifeq "$(profile)" "1" endif +../../tools/textfilter: ../../tools/textfilter.c + make -C ../../tools/ textfilter + readme.txt: ../../tools/textfilter ../base_readme.txt ../../tools/textfilter ../base_readme.txt $@ GIZ diff --git a/platform/gizmondo/emu.c b/platform/gizmondo/emu.c index 3115ebd3..83f8cee1 100644 --- a/platform/gizmondo/emu.c +++ b/platform/gizmondo/emu.c @@ -8,9 +8,9 @@ #include "kgsdk/Framework2D.h" #include "kgsdk/FrameworkAudio.h" #include "../common/emu.h" -#include "../common/lprintf.h" #include "../common/arm_utils.h" #include "../common/config.h" +#include "../libpicofe/lprintf.h" #include "emu.h" #include "menu.h" #include "giz.h" @@ -94,7 +94,7 @@ void pemu_prep_defconfig(void) static int EmuScanBegin16(unsigned int num) { - DrawLineDest = (unsigned short *) giz_screen + 321 * num; + Pico.est.DrawLineDest = (unsigned short *) giz_screen + 321 * num; if ((currentConfig.EmuOpt&0x4000) && (num&1) == 0) // (Pico.m.frame_count&1)) return 1; // skip next line @@ -105,7 +105,7 @@ static int EmuScanBegin16(unsigned int num) static int EmuScanBegin8(unsigned int num) { // draw like the fast renderer - HighCol = gfx_buffer + 328 * num; + Pico.est.HighCol = gfx_buffer + 328 * num; return 0; } @@ -122,13 +122,6 @@ static void osd_text(int x, int y, const char *text) emu_text_out16(x, y, text); } -/* -void log1(void *p1, void *p2) -{ - lprintf("%p %p %p\n", p1, p2, DrawLineDest); -} -*/ - static void cd_leds(void) { static int old_reg = 0; @@ -152,7 +145,7 @@ static void blit(const char *fps, const char *notice) { int emu_opt = currentConfig.EmuOpt; - if (PicoOpt&0x10) + if (PicoIn.opt&0x10) { int lines_flags = 224; // 8bit fast renderer @@ -161,40 +154,43 @@ static void blit(const char *fps, const char *notice) vidConvCpyRGB565(localPal, Pico.cram, 0x40); } // a hack for VR - if (PicoAHW & PAHW_SVP) - memset32((int *)(PicoDraw2FB+328*8+328*223), 0xe0e0e0e0, 328); + if (PicoIn.AHW & PAHW_SVP) + memset((int *)(Pico.est.Draw2FB+328*8+328*223), 0xe0e0e0e0, 328*4); if (!(Pico.video.reg[12]&1)) lines_flags|=0x10000; if (currentConfig.EmuOpt&0x4000) lines_flags|=0x40000; // (Pico.m.frame_count&1)?0x20000:0x40000; - vidCpy8to16((unsigned short *)giz_screen+321*8, PicoDraw2FB+328*8, localPal, lines_flags); + vidCpy8to16((unsigned short *)giz_screen+321*8, Pico.est.Draw2FB+328*8, localPal, lines_flags); } else if (!(emu_opt&0x80)) { int lines_flags; // 8bit accurate renderer if (Pico.m.dirtyPal) { - Pico.m.dirtyPal = 0; - vidConvCpyRGB565(localPal, Pico.cram, 0x40); + if (Pico.m.dirtyPal == 2) + Pico.m.dirtyPal = 0; + /* no support + switch (Pico.est.SonicPalCount) { + case 3: vidConvCpyRGB565(localPal+0xc0, Pico.est.SonicPal+0xc0, 0x40); + case 2: vidConvCpyRGB565(localPal+0x80, Pico.est.SonicPal+0x80, 0x40); + case 1: vidConvCpyRGB565(localPal+0x40, Pico.est.SonicPal+0x40, 0x40); + default://vidConvCpyRGB565(localPal, Pico.est.SonicPal, 0x40); + } */ + vidConvCpyRGB565(localPal, Pico.est.SonicPal, 0x40); if (Pico.video.reg[0xC]&8) { // shadow/hilight mode - //vidConvCpyRGB32sh(localPal+0x40, Pico.cram, 0x40); - //vidConvCpyRGB32hi(localPal+0x80, Pico.cram, 0x40); // TODO? - memcpy32((void *)(localPal+0xc0), (void *)(localPal+0x40), 0x40*2/4); + //vidConvCpyRGB32sh(localPal+0x40, Pico.est.SonicPal, 0x40); + //vidConvCpyRGB32hi(localPal+0x80, Pico.est.SonicPal, 0x40); // TODO? + memcpy((void *)(localPal+0xc0), (void *)(localPal+0x40), 0x40*2); localPal[0xc0] = 0x0600; localPal[0xd0] = 0xc000; localPal[0xe0] = 0x0000; // reserved pixels for OSD localPal[0xf0] = 0xffff; } - /* no support - else if (rendstatus & 0x20) { // mid-frame palette changes - vidConvCpyRGB565(localPal+0x40, HighPal, 0x40); - vidConvCpyRGB565(localPal+0x80, HighPal+0x40, 0x40); - } */ } lines_flags = (Pico.video.reg[1]&8) ? 240 : 224; if (!(Pico.video.reg[12]&1)) lines_flags|=0x10000; if (currentConfig.EmuOpt&0x4000) lines_flags|=0x40000; // (Pico.m.frame_count&1)?0x20000:0x40000; - vidCpy8to16((unsigned short *)giz_screen+321*8, PicoDraw2FB+328*8, localPal, lines_flags); + vidCpy8to16((unsigned short *)giz_screen+321*8, Pico.est.Draw2FB+328*8, localPal, lines_flags); } if (notice || (emu_opt & 2)) { @@ -203,7 +199,7 @@ static void blit(const char *fps, const char *notice) if (emu_opt & 2) osd_text(OSD_FPS_X, h, fps); } - if ((emu_opt & 0x400) && (PicoAHW & PAHW_MCD)) + if ((emu_opt & 0x400) && (PicoIn.AHW & PAHW_MCD)) cd_leds(); } @@ -227,7 +223,7 @@ static void vidResetMode(void) { giz_screen = fb_lock(1); - if (PicoOpt&0x10) { + if (PicoIn.opt&0x10) { } else if (currentConfig.EmuOpt&0x80) { PicoDrawSetOutFormat(PDF_RGB555, 0); PicoDrawSetCallbacks(EmuScanBegin16, NULL); @@ -235,7 +231,7 @@ static void vidResetMode(void) PicoDrawSetOutFormat(PDF_NONE, 0); PicoDrawSetCallbacks(EmuScanBegin8, NULL); } - if ((PicoOpt&0x10) || !(currentConfig.EmuOpt&0x80)) { + if ((PicoIn.opt&0x10) || !(currentConfig.EmuOpt&0x80)) { // setup pal for 8-bit modes localPal[0xc0] = 0x0600; localPal[0xd0] = 0xc000; @@ -273,33 +269,33 @@ static void stdbg(const char *fmt, ...) static void updateSound(int len) { snd_all_samples += len / 2; - PsndOut += len / 2; - if (PsndOut - snd_cbuff >= snd_cbuf_samples) + PicoIn.sndOut += len / 2; + if (PicoIn.sndOut - snd_cbuff >= snd_cbuf_samples) { - //if (PsndOut - snd_cbuff != snd_cbuf_samples) - // stdbg("snd diff is %i, not %i", PsndOut - snd_cbuff, snd_cbuf_samples); - PsndOut = snd_cbuff; + //if (PicoIn.sndOut - snd_cbuff != snd_cbuf_samples) + // stdbg("snd diff is %i, not %i", PicoIn.sndOut - snd_cbuff, snd_cbuf_samples); + PicoIn.sndOut = snd_cbuff; } } static void SkipFrame(void) { - PicoSkipFrame=1; + PicoIn.skipFrame=1; PicoFrame(); - PicoSkipFrame=0; + PicoIn.skipFrame=0; } /* forced frame to front buffer */ void pemu_forced_frame(int no_scale, int do_emu) { - int po_old = PicoOpt; + int po_old = PicoIn.opt; int eo_old = currentConfig.EmuOpt; - PicoOpt &= ~0x10; - PicoOpt |= POPT_ACC_SPRITES; - if (!no_scale) - PicoOpt |= POPT_EN_SOFTSCALE; + PicoIn.opt &= ~0x10; + PicoIn.opt |= POPT_ACC_SPRITES; + if (!no_scale && currentConfig.scaling) + PicoIn.opt |= POPT_EN_SOFTSCALE; currentConfig.EmuOpt |= 0x80; if (giz_screen == NULL) @@ -313,7 +309,7 @@ void pemu_forced_frame(int no_scale, int do_emu) fb_unlock(); giz_screen = NULL; - PicoOpt = po_old; + PicoIn.opt = po_old; currentConfig.EmuOpt = eo_old; } @@ -324,7 +320,7 @@ static void RunEvents(unsigned int which) { int do_it = 1; - if (PsndOut != NULL) + if (PicoIn.sndOut != NULL) FrameworkAudio_SetPause(1); if (giz_screen == NULL) giz_screen = fb_lock(1); @@ -351,18 +347,18 @@ static void RunEvents(unsigned int which) Sleep(0); } - if (PsndOut != NULL) + if (PicoIn.sndOut != NULL) FrameworkAudio_SetPause(0); reset_timing = 1; } if (which & 0x0400) // switch renderer { - if (PicoOpt&0x10) { PicoOpt&=~0x10; currentConfig.EmuOpt |= 0x80; } - else { PicoOpt|= 0x10; currentConfig.EmuOpt &= ~0x80; } + if (PicoIn.opt&0x10) { PicoIn.opt&=~0x10; currentConfig.EmuOpt |= 0x80; } + else { PicoIn.opt|= 0x10; currentConfig.EmuOpt &= ~0x80; } vidResetMode(); - if (PicoOpt&0x10) { + if (PicoIn.opt&0x10) { strcpy(noticeMsg, " 8bit fast renderer"); } else if (currentConfig.EmuOpt&0x80) { strcpy(noticeMsg, "16bit accurate renderer"); @@ -399,16 +395,16 @@ static void updateKeys(void) keys &= CONFIGURABLE_KEYS; - PicoPad[0] = allActions[0] & 0xfff; - PicoPad[1] = allActions[1] & 0xfff; + PicoIn.pad[0] = allActions[0] & 0xfff; + PicoIn.pad[1] = allActions[1] & 0xfff; - if (allActions[0] & 0x7000) emu_DoTurbo(&PicoPad[0], allActions[0]); - if (allActions[1] & 0x7000) emu_DoTurbo(&PicoPad[1], allActions[1]); + if (allActions[0] & 0x7000) emu_DoTurbo(&PicoIn.pad[0], allActions[0]); + if (allActions[1] & 0x7000) emu_DoTurbo(&PicoIn.pad[1], allActions[1]); events = (allActions[0] | allActions[1]) >> 16; // volume is treated in special way and triggered every frame - if ((events & 0x6000) && PsndOut != NULL) + if ((events & 0x6000) && PicoIn.sndOut != NULL) { int vol = currentConfig.volume; if (events & 0x2000) { @@ -462,8 +458,8 @@ void pemu_loop(void) // make sure we are in correct mode vidResetMode(); - if (currentConfig.scaling) PicoOpt|=0x4000; - else PicoOpt&=~0x4000; + if (currentConfig.scaling) PicoIn.opt|=0x4000; + else PicoIn.opt&=~0x4000; Pico.m.dirtyPal = 1; oldmodes = ((Pico.video.reg[12]&1)<<2) ^ 0xc; @@ -473,22 +469,22 @@ void pemu_loop(void) reset_timing = 1; // prepare CD buffer - if (PicoAHW & PAHW_MCD) PicoCDBufferInit(); + if (PicoIn.AHW & PAHW_MCD) PicoCDBufferInit(); // prepare sound stuff - PsndOut = NULL; + PicoIn.sndOut = NULL; if (currentConfig.EmuOpt & 4) { int ret, snd_excess_add, stereo; - if (PsndRate != PsndRate_old || (PicoOpt&0x0b) != (PicoOpt_old&0x0b) || Pico.m.pal != pal_old) { + if (PicoIn.sndRate != PsndRate_old || (PicoIn.opt&0x0b) != (PicoOpt_old&0x0b) || Pico.m.pal != pal_old) { PsndRerate(Pico.m.frame_count ? 1 : 0); } - stereo=(PicoOpt&8)>>3; - snd_excess_add = ((PsndRate - PsndLen*target_fps)<<16) / target_fps; - snd_cbuf_samples = (PsndRate<>3; + snd_excess_add = ((PicoIn.sndRate - Pico.snd.len*target_fps)<<16) / target_fps; + snd_cbuf_samples = (PicoIn.sndRate< 22050) co = 11; - if (PicoOpt&8) shift++; + if (PicoIn.sndRate == 22050) co = 10; + if (PicoIn.sndRate > 22050) co = 11; + if (PicoIn.opt&8) shift++; if (audio_skew < 0) { adj = -((-audio_skew) >> shift); if (audio_skew > -(6<>=1; @@ -607,7 +603,7 @@ void pemu_loop(void) for (i = 0; i < currentConfig.Frameskip; i++) { updateKeys(); SkipFrame(); frames_done++; - if (PsndOut) { // do framelimitting if sound is enabled + if (PicoIn.sndOut) { // do framelimitting if sound is enabled int tval_diff; tval = GetTickCount(); tval_diff = (int)(tval - tval_thissec) << 8; @@ -667,7 +663,7 @@ void pemu_loop(void) if (currentConfig.Frameskip < 0 && tval_diff - lim_time >= (300<<8)) // slowdown detection reset_timing = 1; - else if (PsndOut != NULL || currentConfig.Frameskip < 0) + else if (PicoIn.sndOut != NULL || currentConfig.Frameskip < 0) { // sleep if we are still too fast if (tval_diff < lim_time) @@ -681,10 +677,10 @@ void pemu_loop(void) } - if (PicoAHW & PAHW_MCD) PicoCDBufferFree(); + if (PicoIn.AHW & PAHW_MCD) PicoCDBufferFree(); - if (PsndOut != NULL) { - PsndOut = snd_cbuff = NULL; + if (PicoIn.sndOut != NULL) { + PicoIn.sndOut = snd_cbuff = NULL; FrameworkAudio_Close(); } diff --git a/platform/gizmondo/giz.c b/platform/gizmondo/giz.c index 90273888..93f2c341 100644 --- a/platform/gizmondo/giz.c +++ b/platform/gizmondo/giz.c @@ -11,7 +11,7 @@ #include "kgsdk/Framework.h" #include "kgsdk/Framework2D.h" #include "giz.h" -#include "version.h" +#include "../common/version.h" #define LOG_FILE "log.log" diff --git a/platform/gizmondo/main.c b/platform/gizmondo/main.c index 562b7ade..a53e901c 100644 --- a/platform/gizmondo/main.c +++ b/platform/gizmondo/main.c @@ -14,7 +14,7 @@ #include "../common/menu.h" #include "../common/emu.h" #include "../common/config.h" -#include "version.h" +#include "../common/version.h" int APIENTRY WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPWSTR lpCmdLine, int nCmdShow) diff --git a/platform/gizmondo/menu.c b/platform/gizmondo/menu.c index d4f6bd49..cf1f5b48 100644 --- a/platform/gizmondo/menu.c +++ b/platform/gizmondo/menu.c @@ -28,11 +28,11 @@ #include "../common/emu.h" #include "../common/readpng.h" #include "../common/input.h" -#include "version.h" +#include "../common/version.h" #include #include -#include +#include #define gizKeyUnkn "???" @@ -49,12 +49,10 @@ unsigned char *menu_screen = gfx_buffer; /* draw here and blit later, to avoid f void menu_darken_bg(void *dst, const void *src, int pixels, int darker); static void menu_prepare_bg(int use_game_bg); -static unsigned int inp_prev = 0; - void menu_draw_begin(int use_bgbuff) { if (use_bgbuff) - memcpy32((int *)menu_screen, (int *)bg_buffer, 321*240*2/4); + memcpy((int *)menu_screen, (int *)bg_buffer, 321*240*2); } @@ -66,7 +64,7 @@ void menu_draw_end(void) lprintf("%s: Framework2D_LockBuffer() returned NULL\n", __FUNCTION__); return; } - memcpy32(giz_screen, (int *)menu_screen, 321*240*2/4); + memcpy(giz_screen, (int *)menu_screen, 321*240*2); fb_unlock(); giz_screen = NULL; fb_flip(); @@ -501,7 +499,7 @@ static void draw_savestate_bg(int slot) } if (file) { - if (PicoAHW & 1) { + if (PicoIn.AHW & 1) { PicoCdLoadStateGfx(file); } else { areaSeek(file, 0x10020, SEEK_SET); // skip header and RAM in state file @@ -703,7 +701,7 @@ menu_entry ctrlopt_entries[] = { "Player 1", MB_NONE, MA_CTRL_PLAYER1, NULL, 0, 0, 0, 1, 0 }, { "Player 2", MB_NONE, MA_CTRL_PLAYER2, NULL, 0, 0, 0, 1, 0 }, { "Emulator controls", MB_NONE, MA_CTRL_EMU, NULL, 0, 0, 0, 1, 0 }, - { "6 button pad", MB_ONOFF, MA_OPT_6BUTTON_PAD, &PicoOpt, 0x020, 0, 0, 1, 1 }, + { "6 button pad", MB_ONOFF, MA_OPT_6BUTTON_PAD, &PicoIn.opt, 0x020, 0, 0, 1, 1 }, { "Turbo rate", MB_RANGE, MA_CTRL_TURBO_RATE, ¤tConfig.turbo_rate, 0, 1, 30, 1, 1 }, { "Done", MB_NONE, MA_CTRL_DONE, NULL, 0, 0, 0, 1, 0 }, }; @@ -756,7 +754,7 @@ static void kc_sel_loop(void) if (inp & PBTN_UP ) { menu_sel--; if (menu_sel < 0) menu_sel = menu_sel_max; } if (inp & PBTN_DOWN) { menu_sel++; if (menu_sel > menu_sel_max) menu_sel = 0; } if (inp & PBTN_PLAY) { - int is_6button = PicoOpt & 0x020; + int is_6button = PicoIn.opt & 0x020; switch (selected_id) { case MA_CTRL_PLAYER1: key_config_loop(me_ctrl_actions, is_6button ? 15 : 11, 0); return; case MA_CTRL_PLAYER2: key_config_loop(me_ctrl_actions, is_6button ? 15 : 11, 1); return; @@ -779,12 +777,12 @@ menu_entry cdopt_entries[] = { NULL, MB_NONE, MA_CDOPT_TESTBIOS_EUR, NULL, 0, 0, 0, 1, 0 }, { NULL, MB_NONE, MA_CDOPT_TESTBIOS_JAP, NULL, 0, 0, 0, 1, 0 }, { "CD LEDs", MB_ONOFF, MA_CDOPT_LEDS, ¤tConfig.EmuOpt, 0x0400, 0, 0, 1, 1 }, - { "CDDA audio", MB_ONOFF, MA_CDOPT_CDDA, &PicoOpt, 0x0800, 0, 0, 1, 1 }, - { "PCM audio", MB_ONOFF, MA_CDOPT_PCM, &PicoOpt, 0x0400, 0, 0, 1, 1 }, + { "CDDA audio", MB_ONOFF, MA_CDOPT_CDDA, &PicoIn.opt, 0x0800, 0, 0, 1, 1 }, + { "PCM audio", MB_ONOFF, MA_CDOPT_PCM, &PicoIn.opt, 0x0400, 0, 0, 1, 1 }, { NULL, MB_NONE, MA_CDOPT_READAHEAD, NULL, 0, 0, 0, 1, 1 }, - { "SaveRAM cart", MB_ONOFF, MA_CDOPT_SAVERAM, &PicoOpt, 0x8000, 0, 0, 1, 1 }, - { "Scale/Rot. fx (slow)", MB_ONOFF, MA_CDOPT_SCALEROT_CHIP,&PicoOpt, 0x1000, 0, 0, 1, 1 }, - { "Better sync (slow)", MB_ONOFF, MA_CDOPT_BETTER_SYNC, &PicoOpt, 0x2000, 0, 0, 1, 1 }, + { "SaveRAM cart", MB_ONOFF, MA_CDOPT_SAVERAM, &PicoIn.opt, 0x8000, 0, 0, 1, 1 }, + { "Scale/Rot. fx (slow)", MB_ONOFF, MA_CDOPT_SCALEROT_CHIP,&PicoIn.opt, 0x1000, 0, 0, 1, 1 }, + { "Better sync (slow)", MB_ONOFF, MA_CDOPT_BETTER_SYNC, &PicoIn.opt, 0x2000, 0, 0, 1, 1 }, { "done", MB_NONE, MA_CDOPT_DONE, NULL, 0, 0, 0, 1, 0 }, }; @@ -927,16 +925,17 @@ static void cd_menu_loop_options(void) menu_entry opt2_entries[] = { - { "Disable sprite limit", MB_ONOFF, MA_OPT2_NO_SPRITE_LIM, &PicoOpt, 0x40000, 0, 0, 1, 1 }, - { "Emulate Z80", MB_ONOFF, MA_OPT2_ENABLE_Z80, &PicoOpt, 0x00004, 0, 0, 1, 1 }, - { "Emulate YM2612 (FM)", MB_ONOFF, MA_OPT2_ENABLE_YM2612, &PicoOpt, 0x00001, 0, 0, 1, 1 }, - { "Emulate SN76496 (PSG)", MB_ONOFF, MA_OPT2_ENABLE_SN76496,&PicoOpt, 0x00002, 0, 0, 1, 1 }, + { "Disable sprite limit", MB_ONOFF, MA_OPT2_NO_SPRITE_LIM, &PicoIn.opt, 0x40000, 0, 0, 1, 1 }, + { "Emulate Z80", MB_ONOFF, MA_OPT2_ENABLE_Z80, &PicoIn.opt, 0x00004, 0, 0, 1, 1 }, + { "Emulate YM2612 (FM)", MB_ONOFF, MA_OPT2_ENABLE_YM2612, &PicoIn.opt, 0x00001, 0, 0, 1, 1 }, + { "Emulate SN76496 (PSG)", MB_ONOFF, MA_OPT2_ENABLE_SN76496,&PicoIn.opt, 0x00002, 0, 0, 1, 1 }, + { "Emulate YM2413 (FM)", MB_ONOFF, MA_OPT2_ENABLE_YM2413, &PicoIn.opt, 0x00020, 0, 0, 1, 1 }, { "Double buffering", MB_ONOFF, MA_OPT2_DBLBUFF, ¤tConfig.EmuOpt, 0x8000, 0, 0, 1, 1 }, { "Wait for V-sync (slow)", MB_ONOFF, MA_OPT2_VSYNC, ¤tConfig.EmuOpt, 0x2000, 0, 0, 1, 1 }, { "gzip savestates", MB_ONOFF, MA_OPT2_GZIP_STATES, ¤tConfig.EmuOpt, 0x0008, 0, 0, 1, 1 }, { "Don't save last used ROM", MB_ONOFF, MA_OPT2_NO_LAST_ROM, ¤tConfig.EmuOpt, 0x0020, 0, 0, 1, 1 }, - { "SVP dynarec", MB_ONOFF, MA_OPT2_SVP_DYNAREC, &PicoOpt, 0x20000, 0, 0, 1, 1 }, - { "Disable idle loop patching",MB_ONOFF, MA_OPT2_NO_IDLE_LOOPS, &PicoOpt, 0x80000, 0, 0, 1, 1 }, + { "SVP dynarec", MB_ONOFF, MA_OPT2_SVP_DYNAREC, &PicoIn.opt, 0x20000, 0, 0, 1, 1 }, + { "Disable idle loop patching",MB_ONOFF, MA_OPT2_NO_IDLE_LOOPS, &PicoIn.opt, 0x80000, 0, 0, 1, 1 }, { "done", MB_NONE, MA_OPT2_DONE, NULL, 0, 0, 0, 1, 0 }, }; @@ -1001,7 +1000,7 @@ static void amenu_loop_options(void) menu_entry opt_entries[] = { { NULL, MB_NONE, MA_OPT_RENDERER, NULL, 0, 0, 0, 1, 1 }, - { "Accurate sprites", MB_ONOFF, MA_OPT_ACC_SPRITES, &PicoOpt, 0x080, 0, 0, 0, 1 }, + { "Accurate sprites", MB_ONOFF, MA_OPT_ACC_SPRITES, &PicoIn.opt, 0x080, 0, 0, 0, 1 }, { "Scanline mode (faster)", MB_ONOFF, MA_OPT_INTERLACED, ¤tConfig.EmuOpt, 0x4000, 0, 0, 1, 1 }, { "Scale low res mode", MB_ONOFF, MA_OPT_SCALING, ¤tConfig.scaling, 0x0001, 0, 3, 1, 1 }, { "Show FPS", MB_ONOFF, MA_OPT_SHOW_FPS, ¤tConfig.EmuOpt, 0x0002, 0, 0, 1, 1 }, @@ -1030,7 +1029,7 @@ static void menu_opt_cust_draw(const menu_entry *entry, int x, int y, void *para switch (entry->id) { case MA_OPT_RENDERER: - if (PicoOpt&0x10) + if (PicoIn.opt&0x10) str = " 8bit fast"; else if (currentConfig.EmuOpt&0x80) str = "16bit accurate"; @@ -1045,11 +1044,11 @@ static void menu_opt_cust_draw(const menu_entry *entry, int x, int y, void *para text_out16(x, y, "Frameskip %s", str24); break; case MA_OPT_SOUND_QUALITY: - str = (PicoOpt&0x08)?"stereo":"mono"; - text_out16(x, y, "Sound Quality: %5iHz %s", PsndRate, str); + str = (PicoIn.opt&0x08)?"stereo":"mono"; + text_out16(x, y, "Sound Quality: %5iHz %s", PicoIn.sndRate, str); break; case MA_OPT_REGION: - text_out16(x, y, "Region: %s", me_region_name(PicoRegionOverride, PicoAutoRgnOrder)); + text_out16(x, y, "Region: %s", me_region_name(PicoIn.regionOverride, PicoIn.autoRgnOrder)); break; case MA_OPT_CONFIRM_STATES: switch ((currentConfig.EmuOpt >> 9) & 5) { @@ -1108,31 +1107,31 @@ static void region_prevnext(int right) static int rgn_orders[] = { 0x148, 0x184, 0x814, 0x418, 0x841, 0x481 }; int i; if (right) { - if (!PicoRegionOverride) { + if (!PicoIn.regionOverride) { for (i = 0; i < 6; i++) - if (rgn_orders[i] == PicoAutoRgnOrder) break; - if (i < 5) PicoAutoRgnOrder = rgn_orders[i+1]; - else PicoRegionOverride=1; + if (rgn_orders[i] == PicoIn.autoRgnOrder) break; + if (i < 5) PicoIn.autoRgnOrder = rgn_orders[i+1]; + else PicoIn.regionOverride=1; } - else PicoRegionOverride<<=1; - if (PicoRegionOverride > 8) PicoRegionOverride = 8; + else PicoIn.regionOverride<<=1; + if (PicoIn.regionOverride > 8) PicoIn.regionOverride = 8; } else { - if (!PicoRegionOverride) { + if (!PicoIn.regionOverride) { for (i = 0; i < 6; i++) - if (rgn_orders[i] == PicoAutoRgnOrder) break; - if (i > 0) PicoAutoRgnOrder = rgn_orders[i-1]; + if (rgn_orders[i] == PicoIn.autoRgnOrder) break; + if (i > 0) PicoIn.autoRgnOrder = rgn_orders[i-1]; } - else PicoRegionOverride>>=1; + else PicoIn.regionOverride>>=1; } } static void menu_options_save(void) { - if (PicoRegionOverride) { + if (PicoIn.regionOverride) { // force setting possibly changed.. - Pico.m.pal = (PicoRegionOverride == 2 || PicoRegionOverride == 8) ? 1 : 0; + Pico.m.pal = (PicoIn.regionOverride == 2 || PicoIn.regionOverride == 8) ? 1 : 0; } - if (!(PicoOpt & 0x20)) { + if (!(PicoIn.opt & 0x20)) { // unbind XYZ MODE, just in case unbind_action(0xf00); } @@ -1162,30 +1161,30 @@ static int menu_loop_options(void) switch (selected_id) { case MA_OPT_RENDERER: if (inp & PBTN_LEFT) { - if ((PicoOpt&0x10) || !(currentConfig.EmuOpt &0x80)) { - PicoOpt&= ~0x10; + if ((PicoIn.opt&0x10) || !(currentConfig.EmuOpt &0x80)) { + PicoIn.opt&= ~0x10; currentConfig.EmuOpt |= 0x80; } } else { - if (!(PicoOpt&0x10) || (currentConfig.EmuOpt &0x80)) { - PicoOpt|= 0x10; + if (!(PicoIn.opt&0x10) || (currentConfig.EmuOpt &0x80)) { + PicoIn.opt|= 0x10; currentConfig.EmuOpt &= ~0x80; } } break; case MA_OPT_SOUND_QUALITY: - if ((inp & PBTN_RIGHT) && PsndRate == 44100 && - !(PicoOpt&0x08)) + if ((inp & PBTN_RIGHT) && PicoIn.sndRate == 44100 && + !(PicoIn.opt&0x08)) { - PsndRate = 11025; - PicoOpt |= 8; - } else if ((inp & PBTN_LEFT) && PsndRate == 11025 && - (PicoOpt&0x08) && !(PicoAHW&1)) + PicoIn.sndRate = 11025; + PicoIn.opt |= 8; + } else if ((inp & PBTN_LEFT) && PicoIn.sndRate == 11025 && + (PicoIn.opt&0x08) && !(PicoIn.AHW&1)) { - PsndRate = 44100; - PicoOpt &= ~8; + PicoIn.sndRate = 44100; + PicoIn.opt &= ~8; } else - PsndRate = sndrate_prevnext(PsndRate, inp & PBTN_RIGHT); + PicoIn.sndRate = sndrate_prevnext(PicoIn.sndRate, inp & PBTN_RIGHT); break; case MA_OPT_REGION: region_prevnext(inp & PBTN_RIGHT); @@ -1456,14 +1455,14 @@ static void menu_loop_root(void) // warning: alignment void menu_darken_bg(void *dst, const void *src, int pixels, int darker) { - unsigned int *dest = dst; - const unsigned int *srce = src; + u32 *dest = dst; + const u32 *srce = src; pixels /= 2; if (darker) { while (pixels--) { - unsigned int p = *srce++; + u32 p = *srce++; *dest++ = ((p&0xf79ef79e)>>1) - ((p&0xc618c618)>>3); } } @@ -1471,7 +1470,7 @@ void menu_darken_bg(void *dst, const void *src, int pixels, int darker) { while (pixels--) { - unsigned int p = *srce++; + u32 p = *srce++; *dest++ = (p&0xf79ef79e)>>1; } } @@ -1578,9 +1577,9 @@ int menu_loop_tray(void) selfname = romsel_loop(curr_path); if (selfname) { int ret = -1; - cd_img_type cd_type; + cd_track_type cd_type; cd_type = emu_cdCheck(NULL, romFileName); - if (cd_type != CIT_NOT_CD) + if (cd_type >= 0 && cd_type != CT_UNKNOWN) ret = Insert_CD(romFileName, cd_type); if (ret != 0) { sprintf(menuErrorMsg, "Load failed, invalid CD image?"); diff --git a/platform/gp2x/940ctl.c b/platform/gp2x/940ctl.c index d3769570..b1730025 100644 --- a/platform/gp2x/940ctl.c +++ b/platform/gp2x/940ctl.c @@ -22,9 +22,9 @@ #include "../common/arm_utils.h" #include "../common/menu_pico.h" #include "../common/emu.h" -#include "../../pico/pico_int.h" -#include "../../pico/sound/ym2612.h" -#include "../../pico/sound/mix.h" +#include +#include +#include #include "code940/940shared.h" #include "plat.h" #include "940ctl.h" @@ -100,10 +100,10 @@ int YM2612Write_940(unsigned int a, unsigned int v, int scanline) UINT16 *writebuff = shared_ctl->writebuffsel ? shared_ctl->writebuff0 : shared_ctl->writebuff1; /* detect rapid ym updates */ - if (upd && !(writebuff_ptr & 0x80000000) && scanline < 224) + if (upd && !(writebuff_ptr & 0x80000000)) { - int mid = Pico.m.pal ? 68 : 93; - if (scanline > mid) { + int mid = (Pico.m.pal ? 313 : 262) / 2; + if (scanline >= mid) { //printf("%05i:%03i: rapid ym\n", Pico.m.frame_count, scanline); writebuff[writebuff_ptr++ & 0xffff] = 0xfffe; writebuff_ptr |= 0x80000000; @@ -282,8 +282,14 @@ void sharedmem940_finish(void) } -void YM2612Init_940(int baseclock, int rate) +void YM2612Init_940(int baseclock, int rate, int ssg) { + static int oldrate; + + // HACK + if (Pico.m.frame_count > 0 && !crashed_940 && rate == oldrate) + return; + printf("YM2612Init_940()\n"); printf("Mem usage: shared_data: %i, shared_ctl: %i\n", sizeof(*shared_data), sizeof(*shared_ctl)); @@ -333,7 +339,7 @@ void YM2612Init_940(int baseclock, int rate) memset(shared_ctl, 0, sizeof(*shared_ctl)); /* cause local ym2612 to init REGS */ - YM2612Init_(baseclock, rate); + YM2612Init_(baseclock, rate, ssg); internal_reset(); @@ -353,6 +359,8 @@ void YM2612Init_940(int baseclock, int rate) shared_ctl->baseclock = baseclock; shared_ctl->rate = rate; add_job_940(JOB940_INITALL); + + oldrate = rate; } @@ -383,7 +391,7 @@ int YM2612UpdateOne_940(int *buffer, int length, int stereo, int is_buf_empty) ym_active_chs = shared_ctl->ym_active_chs; // mix in ym buffer. is_buf_empty means nobody mixed there anything yet and it may contain trash - if (is_buf_empty && ym_active_chs) memcpy32(buffer, ym_buf, length<writebuffsel == 1) { @@ -394,9 +402,9 @@ int YM2612UpdateOne_940(int *buffer, int length, int stereo, int is_buf_empty) writebuff_ptr = 0; /* predict sample counter for next frame */ - if (PsndLen_exc_add) { - length = PsndLen; - if (PsndLen_exc_cnt + PsndLen_exc_add >= 0x10000) length++; + if (Pico.snd.len_e_add) { + length = Pico.snd.len; + if (Pico.snd.len_e_cnt + Pico.snd.len_e_add >= 0x10000) length++; } /* give 940 ym job */ @@ -416,9 +424,8 @@ int YM2612UpdateOne_940(int *buffer, int length, int stereo, int is_buf_empty) int mp3dec_decode(FILE *f, int *file_pos, int file_len) { - if (!(PicoOpt & POPT_EXT_FM)) { - //mp3_update_local(buffer, length, stereo); - return 0; + if (!(PicoIn.opt & POPT_EXT_FM)) { + return _mp3dec_decode(f, file_pos, file_len); } // check if playback was started, track not ended @@ -448,18 +455,17 @@ int mp3dec_decode(FILE *f, int *file_pos, int file_len) int mp3dec_start(FILE *f, int fpos_start) { - if (!(PicoOpt & POPT_EXT_FM)) { - //mp3_start_play_local(f, pos); - return -1; + if (!(PicoIn.opt & POPT_EXT_FM)) { + return _mp3dec_start(f, fpos_start); } if (loaded_mp3 != f) { - if (PicoMessage != NULL) + if (PicoIn.osdMessage != NULL) { fseek(f, 0, SEEK_END); if (ftell(f) > 2*1024*1024) - PicoMessage("Loading MP3..."); + PicoIn.osdMessage("Loading MP3..."); } fseek(f, 0, SEEK_SET); fread(mp3_mem, 1, MP3_SIZE_MAX, f); diff --git a/platform/gp2x/940ctl.h b/platform/gp2x/940ctl.h index 5b789dad..dba6cc70 100644 --- a/platform/gp2x/940ctl.h +++ b/platform/gp2x/940ctl.h @@ -1,7 +1,7 @@ void sharedmem940_init(void); void sharedmem940_finish(void); -void YM2612Init_940(int baseclock, int rate); +void YM2612Init_940(int baseclock, int rate, int ssg); void YM2612ResetChip_940(void); int YM2612UpdateOne_940(int *buffer, int length, int stereo, int is_buf_empty); diff --git a/platform/gp2x/Makefile b/platform/gp2x/Makefile index a3786c76..7756689e 100644 --- a/platform/gp2x/Makefile +++ b/platform/gp2x/Makefile @@ -1,9 +1,28 @@ -readme.txt: ../../tools/textfilter ../base_readme.txt +# release packaging makefile + +VER := $(shell head -n 1 ../common/version.h | \ + sed 's/.*"\(.*\)\.\(.*\)".*/\1\2/g') +BUILD := $(shell git describe HEAD | grep -- - | \ + sed -e 's/.*\-\(.*\)\-.*/\1/') +ifneq "$(BUILD)" "" +VER := $(VER)_$(BUILD) +endif + +include ../../config.mak + +all: rel + +../../tools/textfilter: ../../tools/textfilter.c + make -C ../../tools/ textfilter + +readme.txt: ../../tools/textfilter ../base_readme.txt ../../ChangeLog ../../tools/textfilter ../base_readme.txt $@ GP2X -VER ?= $(shell head -n 1 version.h | sed 's/.*"\(.*\)\.\(.*\)".*/\1\2/g') CODE940 = code940/pico940_v3.bin +PicoDrive: ../../picodrive + $(STRIP) $^ -o $@ + rel: PicoDrive PicoDrive.gpe $(CODE940) readme.txt ../game_def.cfg \ PicoDrive.png PicoDrive_s.png PicoDrive_t.png \ warm_2.4.25.o warm_2.4.26-open2x.o warm_2.6.24.ko \ @@ -13,8 +32,8 @@ rel: PicoDrive PicoDrive.gpe $(CODE940) readme.txt ../game_def.cfg \ cp PicoDrive.ini out/ cp skin/*.png out/PicoDrive/skin/ cp skin/*.txt out/PicoDrive/skin/ - mkdir out/bin_to_cso_mp3 - cp ../../tools/bin_to_cso_mp3/* out/bin_to_cso_mp3/ + #mkdir out/bin_to_cso_mp3 + #cp ../../tools/bin_to_cso_mp3/* out/bin_to_cso_mp3/ cd out && zip -9 -r ../../../PicoDrive_$(VER).zip * rm -rf out diff --git a/platform/gp2x/PicoDrive.gpe b/platform/gp2x/PicoDrive.gpe index 1c065185..59416d93 100644 --- a/platform/gp2x/PicoDrive.gpe +++ b/platform/gp2x/PicoDrive.gpe @@ -7,6 +7,8 @@ if ! [ -e /dev/accel ]; then export POLLUX_RAM_TIMINGS='ram_timings=2,9,4,1,1,1,1' export POLLUX_LCD_TIMINGS_NTSC='lcd_timings=397,1,37,277,341,0,17,337;clkdiv0=9' export POLLUX_LCD_TIMINGS_PAL='lcd_timings=428,1,37,277,341,0,17,337;clkdiv0=10' +else + export POLLUX_RAM_TIMINGS='ram_timings=3,9,4,1,1,1,1' fi ./PicoDrive "$@" diff --git a/platform/gp2x/PicoDrive.ini b/platform/gp2x/PicoDrive.ini new file mode 100644 index 00000000..06b816fa --- /dev/null +++ b/platform/gp2x/PicoDrive.ini @@ -0,0 +1,6 @@ +[info] +name="PicoDrive" +path="/PicoDrive/PicoDrive.gpe" +icon="/PicoDrive/PicoDrive_s.png" +title="/PicoDrive/PicoDrive_t.png" +group="GAMES" diff --git a/platform/gp2x/PicoDrive_s.png b/platform/gp2x/PicoDrive_s.png new file mode 100644 index 00000000..f5966165 Binary files /dev/null and b/platform/gp2x/PicoDrive_s.png differ diff --git a/platform/gp2x/PicoDrive_t.png b/platform/gp2x/PicoDrive_t.png new file mode 100644 index 00000000..79fc5290 Binary files /dev/null and b/platform/gp2x/PicoDrive_t.png differ diff --git a/platform/gp2x/code940/940.c b/platform/gp2x/code940/940.c index 760816eb..60be15c3 100644 --- a/platform/gp2x/code940/940.c +++ b/platform/gp2x/code940/940.c @@ -2,7 +2,7 @@ // (c) Copyright 2006-2007, Grazvydas "notaz" Ignotas #include "940shared.h" -#include "../../common/mp3.h" +#include static _940_data_t *shared_data = (_940_data_t *) 0x00100000; static _940_ctl_t *shared_ctl = (_940_ctl_t *) 0x00200000; @@ -19,7 +19,7 @@ void drain_wb(void); // is changed by other core just before we update it void set_if_not_changed(int *val, int oldval, int newval); -void _memcpy(void *dst, const void *src, int count); +extern void *memcpy(void *dest, const void *src, unsigned long n); // asm volatile ("mov r0, #0" ::: "r0"); // asm volatile ("mcr p15, 0, r0, c7, c6, 0" ::: "r0"); /* flush dcache */ @@ -153,6 +153,8 @@ void Main940(void) int job = 0; ym2612_940 = &shared_data->ym2612; +// extern unsigned __bss_start__, __bss_end__; +// memset(&__bss_start__, 0, &__bss_end__ - &__bss_start__); for (;;) { @@ -165,8 +167,9 @@ void Main940(void) case JOB940_INITALL: /* ym2612 */ shared_ctl->writebuff0[0] = shared_ctl->writebuff1[0] = 0xffff; - YM2612Init_(shared_ctl->baseclock, shared_ctl->rate); + YM2612Init_(shared_ctl->baseclock, shared_ctl->rate, 0); /* Helix mp3 decoder */ + __malloc_init(); shared_data->mp3dec = MP3InitDecoder(); break; @@ -184,8 +187,8 @@ void Main940(void) break; case JOB940_PICOSTATESAVE2: - YM2612PicoStateSave2(0, 0); - _memcpy(shared_ctl->writebuff0, ym2612_940->REGS, 0x200); + YM2612PicoStateSave2(0, 0, 0); + memcpy(shared_ctl->writebuff0, ym2612_940->REGS, 0x200); break; case JOB940_PICOSTATELOAD2_PREP: @@ -193,8 +196,8 @@ void Main940(void) break; case JOB940_PICOSTATELOAD2: - _memcpy(ym2612_940->REGS, shared_ctl->writebuff0, 0x200); - YM2612PicoStateLoad2(0, 0); + memcpy(ym2612_940->REGS, shared_ctl->writebuff0, 0x200); + YM2612PicoStateLoad2(0, 0, 0); break; case JOB940_YM2612UPDATEONE: @@ -207,6 +210,7 @@ void Main940(void) case JOB940_MP3RESET: if (shared_data->mp3dec) MP3FreeDecoder(shared_data->mp3dec); + __malloc_init(); shared_data->mp3dec = MP3InitDecoder(); break; } @@ -215,4 +219,3 @@ void Main940(void) dcache_clean(); } } - diff --git a/platform/gp2x/code940/940shared.h b/platform/gp2x/code940/940shared.h index 6f12a32f..9897c1ca 100644 --- a/platform/gp2x/code940/940shared.h +++ b/platform/gp2x/code940/940shared.h @@ -1,4 +1,4 @@ -#include "../../../pico/sound/ym2612.h" +#include // max 16 jobs, lower num means higher prio enum _940_job_t { diff --git a/platform/gp2x/code940/Makefile b/platform/gp2x/code940/Makefile index e327d136..2535cfda 100644 --- a/platform/gp2x/code940/Makefile +++ b/platform/gp2x/code940/Makefile @@ -1,23 +1,20 @@ # you may or may not need to change this -#devkit_path = x:/stuff/dev/devkitgp2x/ -devkit_path ?= $(HOME)/opt/devkitGP2X/ -lgcc_path = $(devkit_path)lib/gcc/arm-linux/4.0.3/ -CROSS = arm-linux- -#CROSS = $(devkit_path)bin/arm-linux- +lgcc_path = $(HOME)/opt/open2x/gcc-4.1.1-glibc-2.3.6/lib/gcc/arm-open2x-linux/4.1.1/ +CROSS_COMPILE ?= arm-none-eabi- # settings #up = 1 -CFLAGS += -O2 -Wall -fomit-frame-pointer -fstrict-aliasing -ffast-math -CFLAGS += -I../.. -I. -D__GP2X__ -DARM -CFLAGS += -mcpu=arm940t -mtune=arm940t -LDFLAGS = -static -s -e code940 -Ttext 0x0 -L$(lgcc_path) -lgcc +CFLAGS += -O3 -Wall -mno-thumb-interwork -fstrict-aliasing -fno-stack-protector -fno-common -ffast-math +CFLAGS += -I../../common/helix/pub -I../../.. -I. -D__GP2X__ -DARM +CFLAGS += -mcpu=arm940t -mtune=arm940t -mabi=apcs-gnu -mfloat-abi=soft -mfpu=fpa +LDFLAGS = -static -e code940 -Ttext 0x0 -L$(lgcc_path) -lgcc -GCC = $(CROSS)gcc -STRIP = $(CROSS)strip -AS = $(CROSS)as -LD = $(CROSS)ld -OBJCOPY = $(CROSS)objcopy +GCC = $(CROSS_COMPILE)gcc +STRIP = $(CROSS_COMPILE)strip +AS = $(CROSS_COMPILE)as +LD = $(CROSS_COMPILE)ld +OBJCOPY = $(CROSS_COMPILE)objcopy vpath %.c = ../../common @@ -36,7 +33,9 @@ all: $(BIN) # stuff for 940 core # init, emu_control, emu -OBJS940 += 940init.o 940.o 940ym2612.o memcpy.o misc_arm.o mp3.o +OBJS940 += 940init.o 940.o 940ym2612.o misc_arm.o mp3_sync.o +# the asm memcpy code crashes job LOAD2 on 940. Possibly a globbered reg? +# OBJS940 += memcpy.o # the asm code seems to be faster when run on 920, but not on 940 for some reason # OBJS940 += ../../Pico/sound/ym2612_asm.o @@ -44,12 +43,13 @@ OBJS940 += 940init.o 940.o 940ym2612.o memcpy.o misc_arm.o mp3.o OBJS940 += uClibc/memset.o uClibc/s_floor.o uClibc/e_pow.o uClibc/e_sqrt.o uClibc/s_fabs.o OBJS940 += uClibc/s_scalbn.o uClibc/s_copysign.o uClibc/k_sin.o uClibc/k_cos.o uClibc/s_sin.o OBJS940 += uClibc/e_rem_pio2.o uClibc/k_rem_pio2.o uClibc/e_log.o uClibc/wrappers.o +LIBHELIX ?= ../../common/helix/$(notdir $(CROSS_COMPILE))helix_mp3.a $(BIN) : code940.elf @echo ">>>" $@ $(OBJCOPY) -O binary $< $@ -code940.elf : $(OBJS940) ../../common/helix/$(CROSS)helix-mp3.a +code940.elf : $(OBJS940) $(LIBHELIX) @echo ">>>" $@ $(LD) $^ $(LDFLAGS) -o $@ -Map code940.map @@ -57,15 +57,16 @@ code940.elf : $(OBJS940) ../../common/helix/$(CROSS)helix-mp3.a @echo ">>>" $@ $(GCC) $(CFLAGS) -Os -DEXTERNAL_YM2612 -c $< -o $@ -mix.o : ../../../pico/sound/mix.s - @echo ">>>" $@ - $(GCC) $(CFLAGS) -DEXTERNAL_YM2612 -c $< -o $@ misc_arm.o : ../../../pico/misc_arm.s @echo ">>>" $@ $(GCC) $(CFLAGS) -DEXTERNAL_YM2612 -c $< -o $@ -../../common/helix/helix_mp3.a: - @make -C ../../common/helix/ +mp3_sync.o: ../../common/mp3_sync.c + @echo ">>>" $@ + $(GCC) $(CFLAGS) -Os -DCODE940 -c $< -o $@ + +$(LIBHELIX): + @$(MAKE) -C ../../common/helix/ CROSS_COMPILE=$(CROSS_COMPILE) up: $(BIN) @@ -82,7 +83,7 @@ tidy: ## OBJSMP3T = mp3test.o ../gp2x.o ../asmutils.o ../usbjoy.o -mp3test.gpe : $(OBJSMP3T) ../helix/helix_mp3.a +mp3test.gpe : $(OBJSMP3T) $(LIBHELIX) $(GCC) -static -o $@ $^ $(STRIP) $@ @cp -v $@ /mnt/gp2x/mnt/sd diff --git a/platform/gp2x/code940/memcpy.s b/platform/gp2x/code940/memcpy.s index 282762fd..1350639a 100644 --- a/platform/gp2x/code940/memcpy.s +++ b/platform/gp2x/code940/memcpy.s @@ -114,14 +114,12 @@ subs r2, r2, #0x14 blt Lmemcpy_fl32 /* less than 32 bytes (12 from above) */ stmdb sp!, {r4, r7, r8, r9, r10} /* borrow r4 */ -/* blat 64 bytes at a time */ +/* blat 32 bytes at a time */ /* XXX for really big copies perhaps we should use more registers */ Lmemcpy_floop32: ldmia r1!, {r3, r4, r7, r8, r9, r10, r12, lr} stmia r0!, {r3, r4, r7, r8, r9, r10, r12, lr} -ldmia r1!, {r3, r4, r7, r8, r9, r10, r12, lr} -stmia r0!, {r3, r4, r7, r8, r9, r10, r12, lr} -subs r2, r2, #0x40 +subs r2, r2, #0x20 bge Lmemcpy_floop32 cmn r2, #0x10 @@ -314,14 +312,12 @@ stmdb sp!, {r4, r7, r8, r9, r10, lr} subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */ blt Lmemcpy_bl32 -/* blat 64 bytes at a time */ +/* blat 32 bytes at a time */ /* XXX for really big copies perhaps we should use more registers */ Lmemcpy_bloop32: ldmdb r1!, {r3, r4, r7, r8, r9, r10, r12, lr} stmdb r0!, {r3, r4, r7, r8, r9, r10, r12, lr} -ldmdb r1!, {r3, r4, r7, r8, r9, r10, r12, lr} -stmdb r0!, {r3, r4, r7, r8, r9, r10, r12, lr} -subs r2, r2, #0x40 +subs r2, r2, #0x20 bge Lmemcpy_bloop32 Lmemcpy_bl32: diff --git a/platform/gp2x/code940/mp3test.c b/platform/gp2x/code940/mp3test.c index 9072d858..6ffd09c8 100644 --- a/platform/gp2x/code940/mp3test.c +++ b/platform/gp2x/code940/mp3test.c @@ -13,7 +13,7 @@ //#include "emu.h" //#include "menu.h" #include "../asmutils.h" -#include "../helix/pub/mp3dec.h" +#include /* we will need some gp2x internals here */ extern volatile unsigned short *gp2x_memregs; /* from minimal library rlyeh */ diff --git a/platform/gp2x/code940/pico940_v3.bin b/platform/gp2x/code940/pico940_v3.bin new file mode 100755 index 00000000..d77ac4bc Binary files /dev/null and b/platform/gp2x/code940/pico940_v3.bin differ diff --git a/platform/gp2x/code940/uClibc/memset.s b/platform/gp2x/code940/uClibc/memset.s index 0923014c..80cdcb58 100644 --- a/platform/gp2x/code940/uClibc/memset.s +++ b/platform/gp2x/code940/uClibc/memset.s @@ -22,7 +22,7 @@ .text .global memset .type memset,%function - .align 4 + .align 2 memset: mov a4, a1 diff --git a/platform/gp2x/code940/uClibc/wrappers.c b/platform/gp2x/code940/uClibc/wrappers.c index cc4e269e..ce95a48c 100644 --- a/platform/gp2x/code940/uClibc/wrappers.c +++ b/platform/gp2x/code940/uClibc/wrappers.c @@ -4,9 +4,17 @@ double pow(double x, double y) { return __ieee754_pow(x, y); } +double __pow_finite(double x, double y) +{ + return __ieee754_pow(x, y); +} double log(double x) { return __ieee754_log(x); } +double __log_finite(double x) +{ + return __ieee754_log(x); +} diff --git a/platform/gp2x/emu.c b/platform/gp2x/emu.c index 7db89c29..8a1fda83 100644 --- a/platform/gp2x/emu.c +++ b/platform/gp2x/emu.c @@ -1,5 +1,6 @@ /* * (c) Copyright 2006-2010 notaz, All rights reserved. + * (c) Copyright 2019-2024 irixxxx * * For performance reasons 3 renderers are exported for both MD and 32x modes: * - 16bpp line renderer @@ -7,8 +8,8 @@ * - 8bpp tile renderer * In 32x mode: * - 32x layer is overlayed on top of 16bpp one - * - line internal one done on PicoDraw2FB, then mixed with 32x - * - tile internal one done on PicoDraw2FB, then mixed with 32x + * - line internal one done on .Draw2FB, then mixed with 32x + * - tile internal one done on .Draw2FB, then mixed with 32x */ #include @@ -30,7 +31,7 @@ #include #include #include -#include +#include #ifdef BENCHMARK #define OSD_FPS_X 220 @@ -46,6 +47,10 @@ const char *renderer_names[] = { "16bit accurate", " 8bit accurate", " 8bit fast const char *renderer_names32x[] = { "accurate", "faster", "fastest", NULL }; enum renderer_types { RT_16BIT, RT_8BIT_ACC, RT_8BIT_FAST, RT_COUNT }; +static int is_1stblanked; +static int firstline, linecount; +static int firstcol, colcount; + static int (*emu_scan_begin)(unsigned int num) = NULL; static int (*emu_scan_end)(unsigned int num) = NULL; @@ -55,7 +60,7 @@ void pemu_prep_defconfig(void) gp2x_soc_t soc; defaultConfig.CPUclock = default_cpu_clock; - defaultConfig.renderer32x = RT_8BIT_FAST; + defaultConfig.renderer32x = RT_8BIT_ACC; defaultConfig.analog_deadzone = 50; soc = soc_detect(); @@ -70,7 +75,7 @@ void pemu_prep_defconfig(void) void pemu_validate_config(void) { if (gp2x_dev_id != GP2X_DEV_GP2X) - PicoOpt &= ~POPT_EXT_FM; + PicoIn.opt &= ~POPT_EXT_FM; if (gp2x_dev_id != GP2X_DEV_WIZ) currentConfig.EmuOpt &= ~EOPT_WIZ_TEAR_FIX; @@ -83,7 +88,9 @@ void pemu_validate_config(void) static int get_renderer(void) { - if (PicoAHW & PAHW_32X) + if (doing_bg_frame) + return RT_16BIT; + if (PicoIn.AHW & PAHW_32X) return currentConfig.renderer32x; else return currentConfig.renderer; @@ -92,16 +99,12 @@ static int get_renderer(void) static void change_renderer(int diff) { int *r; - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) r = ¤tConfig.renderer32x; else r = ¤tConfig.renderer; *r += diff; - // 8bpp fast is not there (yet?) - if ((PicoAHW & PAHW_SMS) && *r == RT_8BIT_FAST) - (*r)++; - if (*r >= RT_COUNT) *r = 0; else if (*r < 0) @@ -109,7 +112,7 @@ static void change_renderer(int diff) } #define is_16bit_mode() \ - (get_renderer() == RT_16BIT || (PicoAHW & PAHW_32X)) + (currentConfig.renderer == RT_16BIT || (PicoIn.AHW & PAHW_32X) || doing_bg_frame) static void (*osd_text)(int x, int y, const char *text); @@ -128,21 +131,6 @@ static void osd_text8(int x, int y, const char *text) emu_text_out8(x, y, text); } -static void osd_text16(int x, int y, const char *text) -{ - int len = strlen(text)*8; - int *p, i, h, offs; - - len = (len+1) >> 1; - for (h = 0; h < 8; h++) { - offs = (x + g_screen_width * (y+h)) & ~1; - p = (int *) ((short *)g_screen_ptr + offs); - for (i = len; i; i--, p++) - *p = (*p >> 2) & 0x39e7; - } - emu_text_out16(x, y, text); -} - static void osd_text8_rot(int x, int y, const char *text) { int len = strlen(text) * 8; @@ -207,29 +195,58 @@ static void draw_cd_leds(void) static void draw_pico_ptr(void) { - unsigned short *p = (unsigned short *)g_screen_ptr; - int x, y, pitch = 320; + int up = (PicoPicohw.pen_pos[0]|PicoPicohw.pen_pos[1]) & 0x8000; + int x, y, pitch = 320, offs; + // storyware pages are actually squished, 2:1 + int h = (pico_inp_mode == 1 ? 160 : linecount); + if (h < 224) y++; - // only if pen enabled and for 16bit modes - if (pico_inp_mode == 0 || currentConfig.EmuOpt != RT_16BIT) - return; - - x = pico_pen_x + PICO_PEN_ADJUST_X; - y = pico_pen_y + PICO_PEN_ADJUST_Y; - if (!(Pico.video.reg[12]&1) && !(PicoOpt & POPT_DIS_32C_BORDER)) - x += 32; + x = ((pico_pen_x * colcount * ((1ULL<<32)/320 + 1)) >> 32) + firstcol; + y = ((pico_pen_y * h * ((1ULL<<32)/224 + 1)) >> 32) + firstline; if (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) { pitch = 240; - p += (319 - x) * pitch + y; + offs = (319 - x) * pitch + y; } else - p += x + y * pitch; + offs = x + y * pitch; - p[0] ^= 0xffff; - p[pitch-1] ^= 0xffff; - p[pitch] ^= 0xffff; - p[pitch+1] ^= 0xffff; - p[pitch*2] ^= 0xffff; + if (is_16bit_mode()) { + unsigned short *p = (unsigned short *)g_screen_ptr + offs; + int o = (up ? 0x0000 : 0xffff), _ = (up ? 0xffff : 0x0000); + + p[-pitch-1] ^= o; p[-pitch] ^= _; p[-pitch+1] ^= _; p[-pitch+2] ^= o; + p[-1] ^= _; p[0] ^= o; p[1] ^= o; p[2] ^= _; + p[pitch-1] ^= _; p[pitch] ^= o; p[pitch+1] ^= o; p[pitch+2] ^= _; + p[2*pitch-1]^= o; p[2*pitch]^= _; p[2*pitch+1]^= _; p[2*pitch+2]^= o; + } else { + unsigned char *p = (unsigned char *)g_screen_ptr + offs; + int o = (up ? 0xe0 : 0xf0), _ = (up ? 0xf0 : 0xe0); + + p[-pitch-1] = o; p[-pitch] = _; p[-pitch+1] = _; p[-pitch+2] = o; + p[-1] = _; p[0] = o; p[1] = o; p[2] = _; + p[pitch-1] = _; p[pitch] = o; p[pitch+1] = o; p[pitch+2] = _; + p[2*pitch-1]= o; p[2*pitch]= _; p[2*pitch+1]= _; p[2*pitch+2]= o; + } +} + +static void clear_1st_column(int firstcol, int firstline, int linecount) +{ + int size = is_16bit_mode() ? 2 : 1; + int black = is_16bit_mode() ? 0 : 0xe0; + int i; + + // SMS 1st column blanked, replace with black + if ((currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) && !doing_bg_frame) { + int pitch = 240*size; + char *p = (char *)g_screen_ptr + (319-(firstcol-8))*pitch; + for (i = 0; i < 8; i++, p -= pitch) + memset(p+(firstline)*size, black, linecount*size); + } else { + int pitch = 320*size; + char *p = (char *)g_screen_ptr + (firstline)*pitch; + for (i = 0; i < linecount; i++, p += pitch) + memset(p+(firstcol-8)*size, black, 8*size); + } } /* rot thing for Wiz */ @@ -237,7 +254,7 @@ static unsigned char __attribute__((aligned(4))) rot_buff[320*4*2]; static int EmuScanBegin16_rot(unsigned int num) { - DrawLineDest = rot_buff + (num & 3) * 320 * 2; + Pico.est.DrawLineDest = rot_buff + (num & 3) * 320 * 2; return 0; } @@ -246,13 +263,13 @@ static int EmuScanEnd16_rot(unsigned int num) if ((num & 3) != 3) return 0; rotated_blit16(g_screen_ptr, rot_buff, num + 1, - !(Pico.video.reg[12] & 1) && !(PicoOpt & POPT_EN_SOFTSCALE)); + !(Pico.video.reg[12] & 1) && !(PicoIn.opt & POPT_EN_SOFTSCALE)); return 0; } static int EmuScanBegin8_rot(unsigned int num) { - DrawLineDest = rot_buff + (num & 3) * 320; + Pico.est.DrawLineDest = rot_buff + (num & 3) * 320; return 0; } @@ -261,41 +278,44 @@ static int EmuScanEnd8_rot(unsigned int num) if ((num & 3) != 3) return 0; rotated_blit8(g_screen_ptr, rot_buff, num + 1, - !(Pico.video.reg[12] & 1)); + !(Pico.video.reg[12] & 1) && !(PicoIn.opt & POPT_EN_SOFTSCALE)); return 0; } /* line doublers */ static unsigned int ld_counter; -static int ld_left, ld_lines; +static int ld_left, ld_lines; // numbers in Q1 format static int EmuScanBegin16_ld(unsigned int num) { - if ((signed int)(ld_counter - num) > 100) - ld_counter = 0; + if ((signed int)(ld_counter - num) > 100) { + // vsync, offset so that the upscaled image is centered + ld_counter = 120 - (120-num) * (ld_lines+2)/ld_lines; + ld_left = ld_lines; + } if (emu_scan_begin) return emu_scan_begin(ld_counter); else - DrawLineDest = (char *)g_screen_ptr + 320 * ld_counter * gp2x_current_bpp / 8; + Pico.est.DrawLineDest = (char *)g_screen_ptr + 320 * ld_counter * gp2x_current_bpp / 8; return 0; } static int EmuScanEnd16_ld(unsigned int num) { - void *oldline = DrawLineDest; + void *oldline = Pico.est.DrawLineDest; if (emu_scan_end) emu_scan_end(ld_counter); ld_counter++; - ld_left--; + ld_left -= 2; if (ld_left <= 0) { - ld_left = ld_lines; + ld_left += ld_lines; EmuScanBegin16_ld(num); - memcpy32(DrawLineDest, oldline, 320 * gp2x_current_bpp / 8 / 4); + memcpy(Pico.est.DrawLineDest, oldline, 320 * gp2x_current_bpp / 8); if (emu_scan_end) emu_scan_end(ld_counter); @@ -306,108 +326,149 @@ static int EmuScanEnd16_ld(unsigned int num) } static int localPal[0x100]; -static void (*vidcpyM2)(void *dest, void *src, int m32col, int with_32c_border); +static int localPalSize; + +static void (*vidcpy8bit)(void *dest, void *src, int x_y, int w_h); static int (*make_local_pal)(int fast_mode); static int make_local_pal_md(int fast_mode) { - int pallen = 0xc0; + int pallen = 0x100; - bgr444_to_rgb32(localPal, Pico.cram); - if (fast_mode) - return 0x40; - - if (Pico.video.reg[0xC] & 8) { // shadow/hilight mode - bgr444_to_rgb32_sh(localPal, Pico.cram); - localPal[0xc0] = 0x0000c000; - localPal[0xd0] = 0x00c00000; - localPal[0xe0] = 0x00000000; // reserved pixels for OSD - localPal[0xf0] = 0x00ffffff; - pallen = 0x100; + if (fast_mode) { + bgr444_to_rgb32(localPal, PicoMem.cram, 64); + pallen = 0x40; + Pico.m.dirtyPal = 0; } - else if (rendstatus & PDRAW_SONIC_MODE) { // mid-frame palette changes - bgr444_to_rgb32(localPal+0x40, HighPal); - bgr444_to_rgb32(localPal+0x80, HighPal+0x40); + else if (Pico.est.rendstatus & PDRAW_SONIC_MODE) { // mid-frame palette changes + switch (Pico.est.SonicPalCount) { + case 3: bgr444_to_rgb32(localPal+0xc0, Pico.est.SonicPal+0xc0, 64); + case 2: bgr444_to_rgb32(localPal+0x80, Pico.est.SonicPal+0x80, 64); + case 1: bgr444_to_rgb32(localPal+0x40, Pico.est.SonicPal+0x40, 64); + default:bgr444_to_rgb32(localPal, Pico.est.SonicPal, 64); + } + pallen = (Pico.est.SonicPalCount+1)*0x40; } - else - memcpy32(localPal+0x80, localPal, 0x40); // for spr prio mess + else if (Pico.video.reg[0xC] & 8) { // shadow/hilight mode + bgr444_to_rgb32(localPal, Pico.est.SonicPal, 64); + bgr444_to_rgb32_sh(localPal, Pico.est.SonicPal); + memcpy(localPal+0xc0, localPal, 0x40*4); // for spr prio mess + } + else { + bgr444_to_rgb32(localPal, Pico.est.SonicPal, 64); + memcpy(localPal+0x40, localPal, 0x40*4); // for spr prio mess + memcpy(localPal+0x80, localPal, 0x80*4); // for spr prio mess + } + localPal[0xc0] = 0x0000c000; + localPal[0xd0] = 0x00c00000; + localPal[0xe0] = 0x00000000; // reserved pixels for OSD + localPal[0xf0] = 0x00ffffff; + if (Pico.m.dirtyPal == 2) + Pico.m.dirtyPal = 0; return pallen; } static int make_local_pal_sms(int fast_mode) { - unsigned short *spal = Pico.cram; - unsigned int *dpal = (void *)localPal; - unsigned int i, t; - - for (i = 0x40; i > 0; i--) { - t = *spal++; - t = ((t & 0x0003) << 22) | ((t & 0x000c) << 12) | ((t & 0x0030) << 2); - t |= t >> 2; - t |= t >> 4; - *dpal++ = t; + static u16 tmspal[32] = { + // SMS palette for TMS modes + 0x0000, 0x0000, 0x00a0, 0x00f0, 0x0500, 0x0f00, 0x0005, 0x0ff0, + 0x000a, 0x000f, 0x0055, 0x00ff, 0x0050, 0x0f0f, 0x0555, 0x0fff, + // TMS palette + 0x0000, 0x0000, 0x04c2, 0x07d6, 0x0e55, 0x0f77, 0x055c, 0x0ee4, + 0x055f, 0x077f, 0x05bc, 0x08ce, 0x03a2, 0x0b5c, 0x0ccc, 0x0fff, + }; + int i; + + if (!(Pico.video.reg[0] & 0x4)) { + for (i = Pico.est.SonicPalCount; i >= 0; i--) { + int sg = !!(PicoIn.AHW & (PAHW_SG|PAHW_SC)); + bgr444_to_rgb32(localPal+i*0x40, tmspal+sg*0x10, 32); + memcpy(localPal+i*0x40+0x20, localPal+i*0x40, 0x20*4); + } + } else if (fast_mode) { + for (i = 0;i >= 0; i--) { + bgr444_to_rgb32(localPal+i*0x40, PicoMem.cram+i*0x40, 32); + memcpy(localPal+i*0x40+0x20, localPal+i*0x40, 0x20*4); + } + } else { + for (i = Pico.est.SonicPalCount; i >= 0; i--) { + bgr444_to_rgb32(localPal+i*0x40, Pico.est.SonicPal+i*0x40, 32); + memcpy(localPal+i*0x40+0x20, localPal+i*0x40, 0x20*4); + } } - - return 0x40; + if (Pico.m.dirtyPal == 2) + Pico.m.dirtyPal = 0; + return (Pico.est.SonicPalCount+1)*0x40; } void pemu_finalize_frame(const char *fps, const char *notice) { int emu_opt = currentConfig.EmuOpt; - int ret; + int direct_rendered = 1; - if (PicoAHW & PAHW_32X) - ; // nothing to do + if (is_16bit_mode()) + localPalSize = 0; // nothing to do else if (get_renderer() == RT_8BIT_FAST) { // 8bit fast renderer - if (Pico.m.dirtyPal) { - Pico.m.dirtyPal = 0; - ret = make_local_pal(1); - // feed new palette to our device - gp2x_video_setpalette(localPal, ret); - } + if (Pico.m.dirtyPal) + localPalSize = make_local_pal(1); // a hack for VR - if (PicoAHW & PAHW_SVP) - memset32((int *)(PicoDraw2FB+328*8+328*223), 0xe0e0e0e0, 328); + if (PicoIn.AHW & PAHW_SVP) + memset32((int *)(Pico.est.Draw2FB+328*8+328*223), 0xe0e0e0e0, 328/4); // do actual copy - vidcpyM2(g_screen_ptr, PicoDraw2FB+328*8, - !(Pico.video.reg[12] & 1), !(PicoOpt & POPT_DIS_32C_BORDER)); + vidcpy8bit(g_screen_ptr, Pico.est.Draw2FB, + (firstcol << 16) | firstline, (colcount << 16) | linecount); + direct_rendered = 0; } else if (get_renderer() == RT_8BIT_ACC) { // 8bit accurate renderer if (Pico.m.dirtyPal) - { - Pico.m.dirtyPal = 0; - ret = make_local_pal(0); - gp2x_video_setpalette(localPal, ret); - } + localPalSize = make_local_pal(0); } + // blank 1st column, only needed in modes directly rendering to screen + if (is_1stblanked && direct_rendered) + clear_1st_column(firstcol, firstline, linecount); + if (notice) osd_text(4, osd_y, notice); if (emu_opt & EOPT_SHOW_FPS) osd_text(osd_fps_x, osd_y, fps); - if ((PicoAHW & PAHW_MCD) && (emu_opt & EOPT_EN_CD_LEDS)) + if ((PicoIn.AHW & PAHW_MCD) && (emu_opt & EOPT_EN_CD_LEDS)) draw_cd_leds(); - if (PicoAHW & PAHW_PICO) - draw_pico_ptr(); + if (PicoIn.AHW & PAHW_PICO) { + int h = linecount, w = colcount; + u16 *pd = g_screen_ptr + firstline*g_screen_ppitch + firstcol; + + if (pico_inp_mode && is_16bit_mode()) + emu_pico_overlay(pd, w, h, g_screen_ppitch); + if (pico_inp_mode /*== 2 || overlay*/) + draw_pico_ptr(); + } } void plat_video_flip(void) { int stride = g_screen_width; gp2x_video_flip(); + // switching the palette takes immediate effect, whilst flipping only + // takes effect with the next vsync; unavoidable flicker may occur! + if (localPalSize) + gp2x_video_setpalette(localPal, localPalSize); if (is_16bit_mode()) stride *= 2; - PicoDrawSetOutBuf(g_screen_ptr, stride); + // the fast renderer has overlap areas and can't directly render to + // screen buffers. Its output is copied to screen in finalize_frame + if (get_renderer() != RT_8BIT_FAST || (PicoIn.AHW & PAHW_32X)) + PicoDrawSetOutBuf(g_screen_ptr, stride); } /* XXX */ -#ifdef __GP2X__ unsigned int plat_get_ticks_ms(void) { return gp2x_get_ticks_ms(); @@ -417,7 +478,6 @@ unsigned int plat_get_ticks_us(void) { return gp2x_get_ticks_us(); } -#endif void plat_wait_till_us(unsigned int us_to) { @@ -440,30 +500,31 @@ void plat_video_wait_vsync(void) void plat_status_msg_clear(void) { - int is_8bit = !is_16bit_mode(); - if (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) { - /* ugh.. */ - int i, u, *p; - if (is_8bit) { - for (i = 0; i < 4; i++) { + int i, is_8bit = !is_16bit_mode(); + + for (i = 0; i < 4; i++) { + if (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) { + /* ugh.. */ + int u, *p; + if (is_8bit) { p = (int *)gp2x_screens[i] + (240-8) / 4; for (u = 320; u > 0; u--, p += 240/4) p[0] = p[1] = 0xe0e0e0e0; - } - } else { - for (i = 0; i < 4; i++) { + } else { p = (int *)gp2x_screens[i] + (240-8)*2 / 4; for (u = 320; u > 0; u--, p += 240*2/4) p[0] = p[1] = p[2] = p[3] = 0; } + } else { + if (is_8bit) { + char *d = (char *)gp2x_screens[i] + 320 * (240-8); + memset32((int *)d, 0xe0e0e0e0, 320 * 8 / 4); + } else { + char *d = (char *)gp2x_screens[i] + 320*2 * (240-8); + memset32((int *)d, 0, 2*320 * 8 / 4); + } } - return; } - - if (is_8bit) - gp2x_memset_all_buffers(320*232, 0xe0, 320*8); - else - gp2x_memset_all_buffers(320*232*2, 0, 320*8*2); } void plat_status_msg_busy_next(const char *msg) @@ -480,7 +541,6 @@ void plat_status_msg_busy_next(const char *msg) void plat_status_msg_busy_first(const char *msg) { - gp2x_memcpy_all_buffers(g_screen_ptr, 0, 320*240*2); plat_status_msg_busy_next(msg); } @@ -489,9 +549,13 @@ static void vid_reset_mode(void) int gp2x_mode = 16; int renderer = get_renderer(); - PicoOpt &= ~POPT_ALT_RENDERER; - emu_scan_begin = NULL; - emu_scan_end = NULL; + PicoIn.opt &= ~(POPT_ALT_RENDERER|POPT_DIS_32C_BORDER|POPT_EN_SOFTSCALE); + if (currentConfig.scaling == EOPT_SCALE_SW) { + PicoIn.opt |= POPT_EN_SOFTSCALE; + PicoIn.filter = EOPT_FILTER_BILINEAR2; + } else if (currentConfig.scaling == EOPT_SCALE_HW) + // hw scaling, render without any padding + PicoIn.opt |= POPT_DIS_32C_BORDER; switch (renderer) { case RT_16BIT: @@ -504,9 +568,9 @@ static void vid_reset_mode(void) gp2x_mode = 8; break; case RT_8BIT_FAST: - PicoOpt |= POPT_ALT_RENDERER; + PicoIn.opt |= POPT_ALT_RENDERER; PicoDrawSetOutFormat(PDF_NONE, 0); - vidcpyM2 = vidcpy_m2; + vidcpy8bit = vidcpy_8bit; gp2x_mode = 8; break; default: @@ -514,35 +578,37 @@ static void vid_reset_mode(void) break; } - if (PicoAHW & PAHW_32X) { + if (PicoIn.AHW & PAHW_32X) { // Wiz 16bit is an exception, uses line rendering due to rotation mess if (renderer == RT_16BIT && (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX)) { PicoDrawSetOutFormat(PDF_RGB555, 1); } - else { - PicoDrawSetOutFormat(PDF_NONE, 0); - } PicoDrawSetOutBuf(g_screen_ptr, g_screen_width * 2); gp2x_mode = 16; } + emu_scan_begin = NULL; + emu_scan_end = NULL; + if (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) { - if ((PicoAHW & PAHW_32X) || renderer == RT_16BIT) { + if ((PicoIn.AHW & PAHW_32X) || renderer == RT_16BIT) { emu_scan_begin = EmuScanBegin16_rot; emu_scan_end = EmuScanEnd16_rot; + memset(rot_buff, 0, 320*4*2); } else if (renderer == RT_8BIT_ACC) { emu_scan_begin = EmuScanBegin8_rot; emu_scan_end = EmuScanEnd8_rot; + memset(rot_buff, 0xe0, 320*4); } else if (renderer == RT_8BIT_FAST) - vidcpyM2 = vidcpy_m2_rot; + vidcpy8bit = vidcpy_8bit_rot; } PicoDrawSetCallbacks(emu_scan_begin, emu_scan_end); if (is_16bit_mode()) - osd_text = (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) ? osd_text16_rot : osd_text16; + osd_text = (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) ? osd_text16_rot : emu_osd_text16; else osd_text = (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) ? osd_text8_rot : osd_text8; @@ -554,31 +620,50 @@ static void vid_reset_mode(void) localPal[0xe0] = 0x00000000; // reserved pixels for OSD localPal[0xf0] = 0x00ffffff; gp2x_video_setpalette(localPal, 0x100); - gp2x_memset_all_buffers(0, 0xe0, 320*240); } - else - gp2x_memset_all_buffers(0, 0, 320*240*2); if (currentConfig.EmuOpt & EOPT_WIZ_TEAR_FIX) gp2x_mode = -gp2x_mode; - gp2x_video_changemode(gp2x_mode); + gp2x_video_changemode(gp2x_mode, Pico.m.pal); + + // clear whole screen in all buffers + if (!is_16bit_mode()) + gp2x_memset_all_buffers(0, 0xe0, 320*240); + else + gp2x_memset_all_buffers(0, 0, 320*240*2); Pico.m.dirtyPal = 1; - PicoOpt &= ~POPT_EN_SOFTSCALE; - if (currentConfig.scaling == EOPT_SCALE_SW) - PicoOpt |= POPT_EN_SOFTSCALE; - // palette converters for 8bit modes - make_local_pal = (PicoAHW & PAHW_SMS) ? make_local_pal_sms : make_local_pal_md; + make_local_pal = (PicoIn.AHW & PAHW_SMS) ? make_local_pal_sms : make_local_pal_md; } -void emu_video_mode_change(int start_line, int line_count, int is_32cols) +void emu_video_mode_change(int start_line, int line_count, int start_col, int col_count) { int scalex = 320, scaley = 240; int ln_offs = 0; + if (currentConfig.vscaling != EOPT_SCALE_NONE && + (is_16bit_mode() || get_renderer() != RT_8BIT_FAST)) { + /* NTSC always has 224 visible lines, anything smaller has bars */ + if (line_count < 224 && line_count > 144) { + start_line -= (224-line_count) /2; + line_count = 224; + } + + /* line doubling for swscaling, also needed for bg frames */ + if (currentConfig.vscaling == EOPT_SCALE_SW && line_count < 240) { + ld_lines = ld_left = 2*line_count / (240 - line_count); + PicoDrawSetCallbacks(EmuScanBegin16_ld,EmuScanEnd16_ld); + } + } + + /* blanking for SMS with 1st tile blanked */ + is_1stblanked = (col_count == 248); + firstline = start_line; linecount = line_count; + firstcol = start_col; colcount = col_count; + if (doing_bg_frame) return; @@ -586,11 +671,9 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols) osd_y = 232; /* set up hwscaling here */ - PicoOpt &= ~POPT_DIS_32C_BORDER; - if (is_32cols && currentConfig.scaling == EOPT_SCALE_HW) { - scalex = 256; - PicoOpt |= POPT_DIS_32C_BORDER; - osd_fps_x = OSD_FPS_X - 64; + if (col_count < 320 && currentConfig.scaling == EOPT_SCALE_HW) { + scalex = col_count; + osd_fps_x = col_count - (320-OSD_FPS_X); } if (currentConfig.vscaling == EOPT_SCALE_HW) { @@ -601,12 +684,6 @@ void emu_video_mode_change(int start_line, int line_count, int is_32cols) gp2x_video_RGB_setscaling(ln_offs, scalex, scaley); - /* line doubling */ - if (currentConfig.vscaling == EOPT_SCALE_SW && line_count < 240) { - ld_lines = ld_left = line_count / (240 - line_count); - PicoDrawSetCallbacks(EmuScanBegin16_ld, EmuScanEnd16_ld); - } - // clear whole screen in all buffers if (!is_16bit_mode()) gp2x_memset_all_buffers(0, 0xe0, 320*240); @@ -624,7 +701,7 @@ void plat_video_toggle_renderer(int change, int is_menu_call) vid_reset_mode(); rendstatus_old = -1; - if (PicoAHW & PAHW_32X) + if (PicoIn.AHW & PAHW_32X) emu_status_msg(renderer_names32x[get_renderer()]); else emu_status_msg(renderer_names[get_renderer()]); @@ -643,7 +720,7 @@ static void RunEventsPico(unsigned int events) if (ret > 35000) { if (pdown_frames++ > 5) - PicoPad[0] |= 0x20; + PicoIn.pad[0] |= 0x20; pico_pen_x = px; pico_pen_y = py; @@ -671,7 +748,7 @@ void plat_update_volume(int has_changed, int is_up) gp2x_soc_t soc; soc = soc_detect(); - if ((PicoOpt & POPT_EN_STEREO) && soc == SOCID_MMSP2) + if ((PicoIn.opt & POPT_EN_STEREO) && soc == SOCID_MMSP2) need_low_volume = 1; if (has_changed) @@ -692,69 +769,42 @@ void plat_update_volume(int has_changed, int is_up) /* set the right mixer func */ if (vol >= 5) - PsndMix_32_to_16l = mix_32_to_16l_stereo; + PsndMix_32_to_16 = mix_32_to_16_stereo; else { - mix_32_to_16l_level = 5 - vol; - PsndMix_32_to_16l = mix_32_to_16l_stereo_lvl; + mix_32_to_16_level = 5 - vol; + PsndMix_32_to_16 = mix_32_to_16_stereo_lvl; } } void pemu_sound_start(void) { + gp2x_soc_t soc; + emu_sound_start(); - plat_target_step_volume(¤tConfig.volume, 0); - -#if 0 - static int PsndRate_old = 0, PicoOpt_old = 0, pal_old = 0; - - PsndOut = NULL; - - // prepare sound stuff if (currentConfig.EmuOpt & EOPT_EN_SOUND) { - int is_stereo = (PicoOpt & POPT_EN_STEREO) ? 1 : 0; - int snd_rate_oss = PsndRate; - gp2x_soc_t soc; - - memset(sndBuffer, 0, sizeof(sndBuffer)); - PsndOut = sndBuffer; - PicoWriteSound = oss_write_nonblocking; - plat_update_volume(0, 0); - - printf("starting audio: %i len: %i stereo: %i, pal: %i\n", - PsndRate, PsndLen, is_stereo, Pico.m.pal); - sndout_oss_start(snd_rate_oss, is_stereo, 1); - sndout_oss_setvol(currentConfig.volume, currentConfig.volume); - soc = soc_detect(); - if (soc == SOCID_POLLUX) - PsndRate = pollux_get_real_snd_rate(PsndRate); - - #define SOUND_RERATE_FLAGS (POPT_EN_FM|POPT_EN_PSG|POPT_EN_STEREO|POPT_EXT_FM|POPT_EN_MCD_CDDA) - if (PsndRate != PsndRate_old || Pico.m.pal != pal_old || ((PicoOpt & POPT_EXT_FM) && crashed_940) || - ((PicoOpt ^ PicoOpt_old) & SOUND_RERATE_FLAGS)) { + if (soc == SOCID_POLLUX) { + PicoIn.sndRate = pollux_get_real_snd_rate(PicoIn.sndRate); PsndRerate(Pico.m.frame_count ? 1 : 0); } - PsndRate_old = PsndRate; - PicoOpt_old = PicoOpt; - pal_old = Pico.m.pal; + plat_target_step_volume(¤tConfig.volume, 0); } -#endif } -static const int sound_rates[] = { 44100, 32000, 22050, 16000, 11025, 8000 }; +static const int sound_rates[] = { 52000, 44100, 32000, 22050, 16000, 11025, 8000 }; void pemu_sound_stop(void) { int i; /* get back from Pollux pain */ - PsndRate += 1000; + PicoIn.sndRate += 1000; for (i = 0; i < ARRAY_SIZE(sound_rates); i++) { - if (PsndRate >= sound_rates[i]) { - PsndRate = sound_rates[i]; + if (PicoIn.sndRate >= sound_rates[i]) { + PicoIn.sndRate = sound_rates[i]; break; } } @@ -763,11 +813,19 @@ void pemu_sound_stop(void) void pemu_forced_frame(int no_scale, int do_emu) { doing_bg_frame = 1; - PicoDrawSetOutBuf(g_screen_ptr, g_screen_width * 2); PicoDrawSetCallbacks(NULL, NULL); Pico.m.dirtyPal = 1; + PicoIn.opt &= ~POPT_DIS_32C_BORDER; + gp2x_current_bpp = 16; + // always render in screen 3 since menu uses 0-2 + g_screen_ptr = gp2x_screens[3]; - emu_cmn_forced_frame(no_scale, do_emu); + if (!no_scale) + no_scale = currentConfig.scaling == EOPT_SCALE_NONE; + emu_cmn_forced_frame(no_scale, do_emu, g_screen_ptr); + + if (is_1stblanked) + clear_1st_column(firstcol, firstline, linecount); g_menubg_src_ptr = g_screen_ptr; doing_bg_frame = 0; @@ -782,6 +840,7 @@ void plat_video_loop_prepare(void) // make sure we are in correct mode change_renderer(0); vid_reset_mode(); + rendstatus_old = -1; } void pemu_loop_prep(void) @@ -800,6 +859,12 @@ void pemu_loop_end(void) { pemu_sound_stop(); + if (g_screen_ptr == gp2x_screens[0]) { + /* currently on screen 3, which is needed for forced_frame */ + int size = gp2x_current_bpp / 8; + gp2x_memcpy_all_buffers(g_screen_ptr, 0, 320*240 * size); + gp2x_video_flip(); + } /* do one more frame for menu bg */ pemu_forced_frame(0, 1); } diff --git a/platform/gp2x/menu.c b/platform/gp2x/menu.c index 7d7ac93d..fc89014d 100644 --- a/platform/gp2x/menu.c +++ b/platform/gp2x/menu.c @@ -13,11 +13,12 @@ const char *men_scaling_opts[] = { "OFF", "software", "hardware", NULL }; mee_onoff ("Vsync", MA_OPT2_VSYNC, currentConfig.EmuOpt, EOPT_VSYNC), #define MENU_OPTIONS_ADV \ - mee_onoff ("Use second CPU for sound", MA_OPT_ARM940_SOUND, PicoOpt, POPT_EXT_FM), \ + mee_onoff ("Use second CPU for sound", MA_OPT_ARM940_SOUND, PicoIn.opt, POPT_EXT_FM), \ static menu_entry e_menu_adv_options[]; static menu_entry e_menu_gfx_options[]; +static menu_entry e_menu_sms_options[]; static menu_entry e_menu_options[]; static menu_entry e_menu_keyconfig[]; @@ -28,6 +29,7 @@ void gp2x_menu_init(void) me_enable(e_menu_gfx_options, MA_OPT_TEARING_FIX, 0); me_enable(e_menu_gfx_options, MA_OPT2_GAMMA, 0); me_enable(e_menu_gfx_options, MA_OPT2_A_SN_GAMMA, 0); + me_enable(e_menu_sms_options, MA_SMSOPT_GHOSTING, 0); switch (gp2x_dev_id) { case GP2X_DEV_GP2X: diff --git a/platform/gp2x/plat.c b/platform/gp2x/plat.c index 503e1496..96e9be09 100644 --- a/platform/gp2x/plat.c +++ b/platform/gp2x/plat.c @@ -16,7 +16,7 @@ #include "warm.h" #include "plat.h" -#include +#include /* GP2X local */ int gp2x_current_bpp; @@ -24,7 +24,7 @@ void *gp2x_screens[4]; void (*gp2x_video_flip)(void); void (*gp2x_video_flip2)(void); -void (*gp2x_video_changemode_ll)(int bpp); +void (*gp2x_video_changemode_ll)(int bpp, int is_pal); void (*gp2x_video_setpalette)(int *pal, int len); void (*gp2x_video_RGB_setscaling)(int ln_offs, int W, int H); void (*gp2x_video_wait_vsync)(void); @@ -71,9 +71,39 @@ static struct in_default_bind in_gp2x_defbinds[] = { 0, 0, 0 } }; -void gp2x_video_changemode(int bpp) +static const struct menu_keymap key_pbtn_map[] = { - gp2x_video_changemode_ll(bpp); + { KEY_UP, PBTN_UP }, + { KEY_DOWN, PBTN_DOWN }, + { KEY_LEFT, PBTN_LEFT }, + { KEY_RIGHT, PBTN_RIGHT }, + /* Caanoo */ + { BTN_THUMB2, PBTN_MOK }, + { BTN_THUMB, PBTN_MBACK }, + { BTN_TRIGGER, PBTN_MA2 }, + { BTN_TOP, PBTN_MA3 }, + { BTN_BASE, PBTN_MENU }, + { BTN_TOP2, PBTN_L }, + { BTN_PINKIE, PBTN_R }, + /* "normal" keyboards */ + { KEY_ENTER, PBTN_MOK }, + { KEY_ESC, PBTN_MBACK }, + { KEY_SEMICOLON, PBTN_MA2 }, + { KEY_APOSTROPHE, PBTN_MA3 }, + { KEY_BACKSLASH, PBTN_MENU }, + { KEY_LEFTBRACE, PBTN_L }, + { KEY_RIGHTBRACE, PBTN_R }, +}; + +static const struct in_pdata gp2x_evdev_pdata = { + .defbinds = in_evdev_defbinds, + .key_map = key_pbtn_map, + .kmap_size = sizeof(key_pbtn_map) / sizeof(key_pbtn_map[0]), +}; + +void gp2x_video_changemode(int bpp, int is_pal) +{ + gp2x_video_changemode_ll(bpp, is_pal); gp2x_current_bpp = bpp < 0 ? -bpp : bpp; } @@ -131,9 +161,8 @@ void plat_video_menu_enter(int is_rom_loaded) gp2x_video_flip2(); // switch to 16bpp - gp2x_video_changemode_ll(16); + gp2x_video_changemode_ll(16, 0); gp2x_video_RGB_setscaling(0, 320, 240); -printf("menu_enter\n"); } void plat_video_menu_begin(void) @@ -150,6 +179,16 @@ void plat_video_menu_leave(void) { } +void *plat_mem_get_for_drc(size_t size) +{ + return NULL; +} + +int plat_parse_arg(int argc, char *argv[], int *x) +{ + return 1; +} + void plat_early_init(void) { // just use gettimeofday until plat_init() @@ -174,6 +213,7 @@ void plat_init(void) g_menuscreen_w = 320; g_menuscreen_h = 240; + g_menuscreen_pp = g_menuscreen_w; gp2x_memset_all_buffers(0, 0, 320*240*2); gp2x_make_fb_bufferable(1); @@ -184,7 +224,7 @@ void plat_init(void) flip_after_sync = 1; gp2x_menu_init(); - in_evdev_init(in_evdev_defbinds); + in_evdev_init(&gp2x_evdev_pdata); in_gp2x_init(in_gp2x_defbinds); in_probe(); plat_target_setup_input(); diff --git a/platform/gp2x/plat.h b/platform/gp2x/plat.h index d46c4303..bb462878 100644 --- a/platform/gp2x/plat.h +++ b/platform/gp2x/plat.h @@ -5,13 +5,13 @@ extern int gp2x_current_bpp; extern void (*gp2x_video_flip)(void); extern void (*gp2x_video_flip2)(void); /* negative bpp means rotated mode (for Wiz) */ -extern void (*gp2x_video_changemode_ll)(int bpp); +extern void (*gp2x_video_changemode_ll)(int bpp, int is_pal); extern void (*gp2x_video_setpalette)(int *pal, int len); extern void (*gp2x_video_RGB_setscaling)(int ln_offs, int W, int H); extern void (*gp2x_video_wait_vsync)(void); /* ??? */ -void gp2x_video_changemode(int bpp); +void gp2x_video_changemode(int bpp, int is_pal); void gp2x_memcpy_all_buffers(void *data, int offset, int len); void gp2x_memset_all_buffers(int offset, int byte, int len); @@ -19,7 +19,7 @@ void gp2x_memset_all_buffers(int offset, int byte, int len); void vid_mmsp2_init(void); void vid_mmsp2_finish(void); -void vid_pollux_init(); -void vid_pollux_finish(); +void vid_pollux_init(void); +void vid_pollux_finish(void); void gp2x_menu_init(void); diff --git a/platform/gp2x/vid_mmsp2.c b/platform/gp2x/vid_mmsp2.c index ea294867..c6c54138 100644 --- a/platform/gp2x/vid_mmsp2.c +++ b/platform/gp2x/vid_mmsp2.c @@ -62,9 +62,8 @@ static void gp2x_video_flip2_(void) g_screen_ptr = gp2x_screens[++screensel&1]; } -static void gp2x_video_changemode_ll_(int bpp) +static void gp2x_video_changemode_ll_(int bpp, int is_pal) { -printf("changemode %d\n", bpp); memregs[0x28DA>>1] = (((bpp+1)/8)<<9)|0xAB; /*8/15/16/24bpp...*/ memregs[0x290C>>1] = 320*((bpp+1)/8); /*line width in bytes*/ } @@ -163,7 +162,7 @@ void vid_mmsp2_init(void) void vid_mmsp2_finish(void) { gp2x_video_RGB_setscaling_(0, 320, 240); - gp2x_video_changemode_ll_(16); + gp2x_video_changemode_ll_(16, 0); memregs[0x290E>>1] = gp2x_screenaddr_old[0]; memregs[0x2910>>1] = gp2x_screenaddr_old[1]; diff --git a/platform/gp2x/vid_pollux.c b/platform/gp2x/vid_pollux.c index 308eb001..b0b28da4 100644 --- a/platform/gp2x/vid_pollux.c +++ b/platform/gp2x/vid_pollux.c @@ -34,31 +34,21 @@ #include "../common/arm_utils.h" #include "plat.h" -#define fb_buf_count 4 -static unsigned int fb_paddr[fb_buf_count]; +#define FB_BUF_COUNT 4 +#define FB_MEM_SIZE (320*240*2 * FB_BUF_COUNT) + +static unsigned int fb_paddr[FB_BUF_COUNT]; static int fb_work_buf; static int fbdev = -1; -static unsigned short memtimex_old[2]; -static int last_pal_setting = 0; - - -/* misc */ -static void pollux_set_fromenv(const char *env_var) -{ - const char *set_string; - set_string = getenv(env_var); - if (set_string) - pollux_set(memregs, set_string); - else - printf("env var %s not defined.\n", env_var); -} /* video stuff */ static void pollux_video_flip(int buf_count) { - memregl[0x406C>>2] = fb_paddr[fb_work_buf]; + memregl[0x406C>>2] = memregl[0x446C>>2] = fb_paddr[fb_work_buf]; memregl[0x4058>>2] |= 0x10; + memregl[0x4458>>2] |= 0x10; + fb_work_buf++; if (fb_work_buf >= buf_count) fb_work_buf = 0; @@ -67,7 +57,7 @@ static void pollux_video_flip(int buf_count) static void gp2x_video_flip_(void) { - pollux_video_flip(fb_buf_count); + pollux_video_flip(FB_BUF_COUNT); } /* doulblebuffered flip */ @@ -76,7 +66,7 @@ static void gp2x_video_flip2_(void) pollux_video_flip(2); } -static void gp2x_video_changemode_ll_(int bpp) +static void gp2x_video_changemode_ll_(int bpp, int is_pal) { static int prev_bpp = 0; int code = 0, bytes = 2; @@ -100,8 +90,9 @@ static void gp2x_video_changemode_ll_(int bpp) memregl[0x4000>>2] |= 1 << 3; /* the above ioctl resets LCD timings, so set them here */ - snprintf(buff, sizeof(buff), "POLLUX_LCD_TIMINGS_%s", last_pal_setting ? "PAL" : "NTSC"); - pollux_set_fromenv(buff); + snprintf(buff, sizeof(buff), "POLLUX_LCD_TIMINGS_%s", + is_pal ? "PAL" : "NTSC"); + pollux_set_fromenv(memregs, buff); switch (abs(bpp)) { @@ -121,12 +112,18 @@ static void gp2x_video_changemode_ll_(int bpp) return; } - memregl[0x405c>>2] = bytes; - memregl[0x4060>>2] = bytes * (bpp < 0 ? 240 : 320); + // program both MLCs so that TV-out works + memregl[0x405c>>2] = memregl[0x445c>>2] = bytes; + memregl[0x4060>>2] = memregl[0x4460>>2] = + bytes * (bpp < 0 ? 240 : 320); r = memregl[0x4058>>2]; r = (r & 0xffff) | (code << 16) | 0x10; memregl[0x4058>>2] = r; + + r = memregl[0x4458>>2]; + r = (r & 0xffff) | (code << 16) | 0x10; + memregl[0x4458>>2] = r; } static void gp2x_video_setpalette_(int *pal, int len) @@ -153,26 +150,6 @@ static void gp2x_video_wait_vsync_(void) memregl[0x308c>>2] |= 1 << 10; } -/* RAM timings */ -static void set_ram_timings_(void) -{ - pollux_set_fromenv("POLLUX_RAM_TIMINGS"); -} - -static void unset_ram_timings_(void) -{ - int i; - - memregs[0x14802>>1] = memtimex_old[0]; - memregs[0x14804>>1] = memtimex_old[1] | 0x8000; - - for (i = 0; i < 0x100000; i++) - if (!(memregs[0x14804>>1] & 0x8000)) - break; - - printf("RAM timings reset to startup values.\n"); -} - void vid_pollux_init(void) { struct fb_fix_screeninfo fbfix; @@ -193,17 +170,17 @@ void vid_pollux_init(void) printf("framebuffer: \"%s\" @ %08lx\n", fbfix.id, fbfix.smem_start); fb_paddr[0] = fbfix.smem_start; - gp2x_screens[0] = mmap(0, 320*240*2*fb_buf_count, PROT_READ|PROT_WRITE, + gp2x_screens[0] = mmap(0, FB_MEM_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, memdev, fb_paddr[0]); if (gp2x_screens[0] == MAP_FAILED) { perror("mmap(gp2x_screens) failed"); exit(1); } - memset(gp2x_screens[0], 0, 320*240*2*fb_buf_count); + memset(gp2x_screens[0], 0, FB_MEM_SIZE); printf(" %p -> %08x\n", gp2x_screens[0], fb_paddr[0]); - for (i = 1; i < fb_buf_count; i++) + for (i = 1; i < FB_BUF_COUNT; i++) { fb_paddr[i] = fb_paddr[i-1] + 320*240*2; gp2x_screens[i] = (char *)gp2x_screens[i-1] + 320*240*2; @@ -212,8 +189,6 @@ void vid_pollux_init(void) fb_work_buf = 0; g_screen_ptr = gp2x_screens[0]; - set_ram_timings_(); - gp2x_video_flip = gp2x_video_flip_; gp2x_video_flip2 = gp2x_video_flip2_; gp2x_video_changemode_ll = gp2x_video_changemode_ll_; @@ -224,9 +199,8 @@ void vid_pollux_init(void) void vid_pollux_finish(void) { - munmap(gp2x_screens[0], 320*240*2 * fb_buf_count); + memset(gp2x_screens[0], 0, FB_MEM_SIZE); + munmap(gp2x_screens[0], FB_MEM_SIZE); close(fbdev); fbdev = -1; - - unset_ram_timings_(); } diff --git a/platform/gp2x/warm_2.4.25.o b/platform/gp2x/warm_2.4.25.o new file mode 100644 index 00000000..23a80b81 Binary files /dev/null and b/platform/gp2x/warm_2.4.25.o differ diff --git a/platform/gp2x/warm_2.4.26-open2x.o b/platform/gp2x/warm_2.4.26-open2x.o new file mode 100644 index 00000000..5bcb338b Binary files /dev/null and b/platform/gp2x/warm_2.4.26-open2x.o differ diff --git a/platform/gp2x/warm_2.6.24.ko b/platform/gp2x/warm_2.6.24.ko new file mode 100644 index 00000000..d2b8dc4d Binary files /dev/null and b/platform/gp2x/warm_2.6.24.ko differ diff --git a/platform/libpicofe b/platform/libpicofe index 1bc471eb..45e44c2a 160000 --- a/platform/libpicofe +++ b/platform/libpicofe @@ -1 +1 @@ -Subproject commit 1bc471ebf1c85cf78f1862f5596a76f051e7112d +Subproject commit 45e44c2aab97cb4d505c53048c383afdd7814f88 diff --git a/platform/libretro.c b/platform/libretro.c deleted file mode 100644 index f3252ab5..00000000 --- a/platform/libretro.c +++ /dev/null @@ -1,901 +0,0 @@ -/* - * libretro core glue for PicoDrive - * (C) notaz, 2013 - * - * This work is licensed under the terms of MAME license. - * See COPYING file in the top-level directory. - */ - -#define _GNU_SOURCE 1 // mremap -#include -#include -#include -#ifndef _WIN32 -#include -#else -#include -#include -#include -#endif -#include -#ifdef __MACH__ -#include -#endif - -#include -#include -#include "common/input_pico.h" -#include "common/version.h" -#include "libretro.h" - -static retro_video_refresh_t video_cb; -static retro_input_poll_t input_poll_cb; -static retro_input_state_t input_state_cb; -static retro_environment_t environ_cb; -static retro_audio_sample_batch_t audio_batch_cb; - -static FILE *emu_log; - -#define VOUT_MAX_WIDTH 320 -#define VOUT_MAX_HEIGHT 240 -static void *vout_buf; -static int vout_width, vout_height; - -static short __attribute__((aligned(4))) sndBuffer[2*44100/50]; - -static void snd_write(int len); - -#ifdef _WIN32 -#define SLASH '\\' -#else -#define SLASH '/' -#endif - -/* functions called by the core */ - -void cache_flush_d_inval_i(void *start, void *end) -{ -#ifdef __arm__ -#if defined(__BLACKBERRY_QNX__) - msync(start, end - start, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE); -#elif defined(__MACH__) - size_t len = (char *)end - (char *)start; - sys_dcache_flush(start, len); - sys_icache_invalidate(start, len); -#else - __clear_cache(start, end); -#endif -#endif -} - -#ifdef _WIN32 -/* mmap() replacement for Windows - * - * Author: Mike Frysinger - * Placed into the public domain - */ - -/* References: - * CreateFileMapping: http://msdn.microsoft.com/en-us/library/aa366537(VS.85).aspx - * CloseHandle: http://msdn.microsoft.com/en-us/library/ms724211(VS.85).aspx - * MapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366761(VS.85).aspx - * UnmapViewOfFile: http://msdn.microsoft.com/en-us/library/aa366882(VS.85).aspx - */ - -#define PROT_READ 0x1 -#define PROT_WRITE 0x2 -/* This flag is only available in WinXP+ */ -#ifdef FILE_MAP_EXECUTE -#define PROT_EXEC 0x4 -#else -#define PROT_EXEC 0x0 -#define FILE_MAP_EXECUTE 0 -#endif - -#define MAP_SHARED 0x01 -#define MAP_PRIVATE 0x02 -#define MAP_ANONYMOUS 0x20 -#define MAP_ANON MAP_ANONYMOUS -#define MAP_FAILED ((void *) -1) - -#ifdef __USE_FILE_OFFSET64 -# define DWORD_HI(x) (x >> 32) -# define DWORD_LO(x) ((x) & 0xffffffff) -#else -# define DWORD_HI(x) (0) -# define DWORD_LO(x) (x) -#endif - -static void *mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset) -{ - if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) - return MAP_FAILED; - if (fd == -1) { - if (!(flags & MAP_ANON) || offset) - return MAP_FAILED; - } else if (flags & MAP_ANON) - return MAP_FAILED; - - DWORD flProtect; - if (prot & PROT_WRITE) { - if (prot & PROT_EXEC) - flProtect = PAGE_EXECUTE_READWRITE; - else - flProtect = PAGE_READWRITE; - } else if (prot & PROT_EXEC) { - if (prot & PROT_READ) - flProtect = PAGE_EXECUTE_READ; - else if (prot & PROT_EXEC) - flProtect = PAGE_EXECUTE; - } else - flProtect = PAGE_READONLY; - - off_t end = length + offset; - HANDLE mmap_fd, h; - if (fd == -1) - mmap_fd = INVALID_HANDLE_VALUE; - else - mmap_fd = (HANDLE)_get_osfhandle(fd); - h = CreateFileMapping(mmap_fd, NULL, flProtect, DWORD_HI(end), DWORD_LO(end), NULL); - if (h == NULL) - return MAP_FAILED; - - DWORD dwDesiredAccess; - if (prot & PROT_WRITE) - dwDesiredAccess = FILE_MAP_WRITE; - else - dwDesiredAccess = FILE_MAP_READ; - if (prot & PROT_EXEC) - dwDesiredAccess |= FILE_MAP_EXECUTE; - if (flags & MAP_PRIVATE) - dwDesiredAccess |= FILE_MAP_COPY; - void *ret = MapViewOfFile(h, dwDesiredAccess, DWORD_HI(offset), DWORD_LO(offset), length); - if (ret == NULL) { - CloseHandle(h); - ret = MAP_FAILED; - } - return ret; -} - -static void munmap(void *addr, size_t length) -{ - UnmapViewOfFile(addr); - /* ruh-ro, we leaked handle from CreateFileMapping() ... */ -} -#endif - -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - -void *plat_mmap(unsigned long addr, size_t size, int need_exec, int is_fixed) -{ - int flags = MAP_PRIVATE | MAP_ANONYMOUS; - void *req, *ret; - - req = (void *)addr; - ret = mmap(req, size, PROT_READ | PROT_WRITE, flags, -1, 0); - if (ret == MAP_FAILED) { - lprintf("mmap(%08lx, %zd) failed: %d\n", addr, size, errno); - return NULL; - } - - if (addr != 0 && ret != (void *)addr) { - lprintf("warning: wanted to map @%08lx, got %p\n", - addr, ret); - - if (is_fixed) { - munmap(ret, size); - return NULL; - } - } - - return ret; -} - -void *plat_mremap(void *ptr, size_t oldsize, size_t newsize) -{ -#ifdef __linux__ - void *ret = mremap(ptr, oldsize, newsize, 0); - if (ret == MAP_FAILED) - return NULL; - - return ret; -#else - void *tmp, *ret; - size_t preserve_size; - - preserve_size = oldsize; - if (preserve_size > newsize) - preserve_size = newsize; - tmp = malloc(preserve_size); - if (tmp == NULL) - return NULL; - memcpy(tmp, ptr, preserve_size); - - munmap(ptr, oldsize); - ret = mmap(ptr, newsize, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (ret == MAP_FAILED) { - free(tmp); - return NULL; - } - memcpy(ret, tmp, preserve_size); - free(tmp); - return ret; -#endif -} - -void plat_munmap(void *ptr, size_t size) -{ - if (ptr != NULL) - munmap(ptr, size); -} - -int plat_mem_set_exec(void *ptr, size_t size) -{ -#ifdef _WIN32 - int ret = VirtualProtect(ptr,size,PAGE_EXECUTE_READWRITE,0); - if (ret == 0) - lprintf("mprotect(%p, %zd) failed: %d\n", ptr, size, 0); -#else - int ret = mprotect(ptr, size, PROT_READ | PROT_WRITE | PROT_EXEC); - if (ret != 0) - lprintf("mprotect(%p, %zd) failed: %d\n", ptr, size, errno); -#endif - return ret; -} - -void emu_video_mode_change(int start_line, int line_count, int is_32cols) -{ - memset(vout_buf, 0, 320 * 240 * 2); - vout_width = is_32cols ? 256 : 320; - PicoDrawSetOutBuf(vout_buf, vout_width * 2); -} - -void emu_32x_startup(void) -{ -} - -#ifndef ANDROID - -void lprintf(const char *fmt, ...) -{ - va_list list; - - va_start(list, fmt); - fprintf(emu_log, "PicoDrive: "); - vfprintf(emu_log, fmt, list); - va_end(list); - fflush(emu_log); -} - -#else - -#include - -void lprintf(const char *fmt, ...) -{ - va_list list; - - va_start(list, fmt); - __android_log_vprint(ANDROID_LOG_INFO, "PicoDrive", fmt, list); - va_end(list); -} - -#endif - -/* libretro */ -void retro_set_environment(retro_environment_t cb) -{ - static const struct retro_variable vars[] = { - //{ "region", "Region; Auto|NTSC|PAL" }, - { "picodrive_input1", "Input device 1; 3 button pad|6 button pad|None" }, - { "picodrive_input2", "Input device 2; 3 button pad|6 button pad|None" }, - { "picodrive_sprlim", "No sprite limit; disabled|enabled" }, - { "picodrive_ramcart", "MegaCD RAM cart; disabled|enabled" }, -#ifdef DRC_SH2 - { "picodrive_drc", "Dynamic recompilers; enabled|disabled" }, -#endif - { NULL, NULL }, - }; - - environ_cb = cb; - - cb(RETRO_ENVIRONMENT_SET_VARIABLES, (void *)vars); -} - -void retro_set_video_refresh(retro_video_refresh_t cb) { video_cb = cb; } -void retro_set_audio_sample(retro_audio_sample_t cb) { (void)cb; } -void retro_set_audio_sample_batch(retro_audio_sample_batch_t cb) { audio_batch_cb = cb; } -void retro_set_input_poll(retro_input_poll_t cb) { input_poll_cb = cb; } -void retro_set_input_state(retro_input_state_t cb) { input_state_cb = cb; } - -unsigned retro_api_version(void) -{ - return RETRO_API_VERSION; -} - -void retro_set_controller_port_device(unsigned port, unsigned device) -{ -} - -void retro_get_system_info(struct retro_system_info *info) -{ - memset(info, 0, sizeof(*info)); - info->library_name = "PicoDrive"; - info->library_version = VERSION; - info->valid_extensions = "bin|gen|smd|md|32x|cue|iso|sms"; - info->need_fullpath = true; -} - -void retro_get_system_av_info(struct retro_system_av_info *info) -{ - memset(info, 0, sizeof(*info)); - info->timing.fps = Pico.m.pal ? 50 : 60; - info->timing.sample_rate = 44100; - info->geometry.base_width = 320; - info->geometry.base_height = 240; - info->geometry.max_width = VOUT_MAX_WIDTH; - info->geometry.max_height = VOUT_MAX_HEIGHT; - info->geometry.aspect_ratio = 4.0 / 3.0; -} - -/* savestates */ -struct savestate_state { - const char *load_buf; - char *save_buf; - size_t size; - size_t pos; -}; - -size_t state_read(void *p, size_t size, size_t nmemb, void *file) -{ - struct savestate_state *state = file; - size_t bsize = size * nmemb; - - if (state->pos + bsize > state->size) { - lprintf("savestate error: %u/%u\n", - state->pos + bsize, state->size); - bsize = state->size - state->pos; - if ((int)bsize <= 0) - return 0; - } - - memcpy(p, state->load_buf + state->pos, bsize); - state->pos += bsize; - return bsize; -} - -size_t state_write(void *p, size_t size, size_t nmemb, void *file) -{ - struct savestate_state *state = file; - size_t bsize = size * nmemb; - - if (state->pos + bsize > state->size) { - lprintf("savestate error: %u/%u\n", - state->pos + bsize, state->size); - bsize = state->size - state->pos; - if ((int)bsize <= 0) - return 0; - } - - memcpy(state->save_buf + state->pos, p, bsize); - state->pos += bsize; - return bsize; -} - -size_t state_skip(void *p, size_t size, size_t nmemb, void *file) -{ - struct savestate_state *state = file; - size_t bsize = size * nmemb; - - state->pos += bsize; - return bsize; -} - -size_t state_eof(void *file) -{ - struct savestate_state *state = file; - - return state->pos >= state->size; -} - -int state_fseek(void *file, long offset, int whence) -{ - struct savestate_state *state = file; - - switch (whence) { - case SEEK_SET: - state->pos = offset; - break; - case SEEK_CUR: - state->pos += offset; - break; - case SEEK_END: - state->pos = state->size + offset; - break; - } - return (int)state->pos; -} - -/* savestate sizes vary wildly depending if cd/32x or - * carthw is active, so run the whole thing to get size */ -size_t retro_serialize_size(void) -{ - struct savestate_state state = { 0, }; - int ret; - - ret = PicoStateFP(&state, 1, NULL, state_skip, NULL, state_fseek); - if (ret != 0) - return 0; - - return state.pos; -} - -bool retro_serialize(void *data, size_t size) -{ - struct savestate_state state = { 0, }; - int ret; - - state.save_buf = data; - state.size = size; - state.pos = 0; - - ret = PicoStateFP(&state, 1, NULL, state_write, - NULL, state_fseek); - return ret == 0; -} - -bool retro_unserialize(const void *data, size_t size) -{ - struct savestate_state state = { 0, }; - int ret; - - state.load_buf = data; - state.size = size; - state.pos = 0; - - ret = PicoStateFP(&state, 0, state_read, NULL, - state_eof, state_fseek); - return ret == 0; -} - -/* cheats - TODO */ -void retro_cheat_reset(void) -{ -} - -void retro_cheat_set(unsigned index, bool enabled, const char *code) -{ -} - -/* multidisk support */ -static bool disk_ejected; -static unsigned int disk_current_index; -static unsigned int disk_count; -static struct disks_state { - char *fname; -} disks[8]; - -static bool disk_set_eject_state(bool ejected) -{ - // TODO? - disk_ejected = ejected; - return true; -} - -static bool disk_get_eject_state(void) -{ - return disk_ejected; -} - -static unsigned int disk_get_image_index(void) -{ - return disk_current_index; -} - -static bool disk_set_image_index(unsigned int index) -{ - cd_img_type cd_type; - int ret; - - if (index >= sizeof(disks) / sizeof(disks[0])) - return false; - - if (disks[index].fname == NULL) { - lprintf("missing disk #%u\n", index); - - // RetroArch specifies "no disk" with index == count, - // so don't fail here.. - disk_current_index = index; - return true; - } - - lprintf("switching to disk %u: \"%s\"\n", index, - disks[index].fname); - - ret = -1; - cd_type = PicoCdCheck(disks[index].fname, NULL); - if (cd_type != CIT_NOT_CD) - ret = Insert_CD(disks[index].fname, cd_type); - if (ret != 0) { - lprintf("Load failed, invalid CD image?\n"); - return 0; - } - - disk_current_index = index; - return true; -} - -static unsigned int disk_get_num_images(void) -{ - return disk_count; -} - -static bool disk_replace_image_index(unsigned index, - const struct retro_game_info *info) -{ - bool ret = true; - - if (index >= sizeof(disks) / sizeof(disks[0])) - return false; - - if (disks[index].fname != NULL) - free(disks[index].fname); - disks[index].fname = NULL; - - if (info != NULL) { - disks[index].fname = strdup(info->path); - if (index == disk_current_index) - ret = disk_set_image_index(index); - } - - return ret; -} - -static bool disk_add_image_index(void) -{ - if (disk_count >= sizeof(disks) / sizeof(disks[0])) - return false; - - disk_count++; - return true; -} - -static struct retro_disk_control_callback disk_control = { - .set_eject_state = disk_set_eject_state, - .get_eject_state = disk_get_eject_state, - .get_image_index = disk_get_image_index, - .set_image_index = disk_set_image_index, - .get_num_images = disk_get_num_images, - .replace_image_index = disk_replace_image_index, - .add_image_index = disk_add_image_index, -}; - -static void disk_tray_open(void) -{ - lprintf("cd tray open\n"); - disk_ejected = 1; -} - -static void disk_tray_close(void) -{ - lprintf("cd tray close\n"); - disk_ejected = 0; -} - - -static const char * const biosfiles_us[] = { - "us_scd2_9306", "SegaCDBIOS9303", "us_scd1_9210", "bios_CD_U" -}; -static const char * const biosfiles_eu[] = { - "eu_mcd2_9306", "eu_mcd2_9303", "eu_mcd1_9210", "bios_CD_E" -}; -static const char * const biosfiles_jp[] = { - "jp_mcd2_921222", "jp_mcd1_9112", "jp_mcd1_9111", "bios_CD_J" -}; - -static void make_system_path(char *buf, size_t buf_size, - const char *name, const char *ext) -{ - const char *dir = NULL; - - if (environ_cb(RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY, &dir) && dir) { - snprintf(buf, buf_size, "%s%c%s%s", dir, SLASH, name, ext); - } - else { - snprintf(buf, buf_size, "%s%s", name, ext); - } -} - -static const char *find_bios(int *region, const char *cd_fname) -{ - const char * const *files; - static char path[256]; - int i, count; - FILE *f = NULL; - - if (*region == 4) { // US - files = biosfiles_us; - count = sizeof(biosfiles_us) / sizeof(char *); - } else if (*region == 8) { // EU - files = biosfiles_eu; - count = sizeof(biosfiles_eu) / sizeof(char *); - } else if (*region == 1 || *region == 2) { - files = biosfiles_jp; - count = sizeof(biosfiles_jp) / sizeof(char *); - } else { - return NULL; - } - - for (i = 0; i < count; i++) - { - make_system_path(path, sizeof(path), files[i], ".bin"); - f = fopen(path, "rb"); - if (f != NULL) - break; - - make_system_path(path, sizeof(path), files[i], ".zip"); - f = fopen(path, "rb"); - if (f != NULL) - break; - } - - if (f != NULL) { - lprintf("using bios: %s\n", path); - fclose(f); - return path; - } - - return NULL; -} - -bool retro_load_game(const struct retro_game_info *info) -{ - enum media_type_e media_type; - static char carthw_path[256]; - size_t i; - - enum retro_pixel_format fmt = RETRO_PIXEL_FORMAT_RGB565; - if (!environ_cb(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &fmt)) { - lprintf("RGB565 support required, sorry\n"); - return false; - } - - if (info == NULL || info->path == NULL) { - lprintf("info->path required\n"); - return false; - } - - for (i = 0; i < sizeof(disks) / sizeof(disks[0]); i++) { - if (disks[i].fname != NULL) { - free(disks[i].fname); - disks[i].fname = NULL; - } - } - - disk_current_index = 0; - disk_count = 1; - disks[0].fname = strdup(info->path); - - make_system_path(carthw_path, sizeof(carthw_path), "carthw", ".cfg"); - - media_type = PicoLoadMedia(info->path, carthw_path, - find_bios, NULL); - - switch (media_type) { - case PM_BAD_DETECT: - lprintf("Failed to detect ROM/CD image type.\n"); - return false; - case PM_BAD_CD: - lprintf("Invalid CD image\n"); - return false; - case PM_BAD_CD_NO_BIOS: - lprintf("Missing BIOS\n"); - return false; - case PM_ERROR: - lprintf("Load error\n"); - return false; - default: - break; - } - - PicoLoopPrepare(); - - PicoWriteSound = snd_write; - memset(sndBuffer, 0, sizeof(sndBuffer)); - PsndOut = sndBuffer; - PsndRerate(0); - - return true; -} - -bool retro_load_game_special(unsigned game_type, const struct retro_game_info *info, size_t num_info) -{ - return false; -} - -void retro_unload_game(void) -{ -} - -unsigned retro_get_region(void) -{ - return Pico.m.pal ? RETRO_REGION_PAL : RETRO_REGION_NTSC; -} - -void *retro_get_memory_data(unsigned id) -{ - if (id != RETRO_MEMORY_SAVE_RAM) - return NULL; - - if (PicoAHW & PAHW_MCD) - return Pico_mcd->bram; - else - return SRam.data; -} - -size_t retro_get_memory_size(unsigned id) -{ - if (id != RETRO_MEMORY_SAVE_RAM) - return 0; - - if (PicoAHW & PAHW_MCD) - // bram - return 0x2000; - else - return SRam.size; -} - -void retro_reset(void) -{ - PicoReset(); -} - -static const unsigned short retro_pico_map[] = { - [RETRO_DEVICE_ID_JOYPAD_B] = 1 << GBTN_B, - [RETRO_DEVICE_ID_JOYPAD_Y] = 1 << GBTN_A, - [RETRO_DEVICE_ID_JOYPAD_SELECT] = 1 << GBTN_MODE, - [RETRO_DEVICE_ID_JOYPAD_START] = 1 << GBTN_START, - [RETRO_DEVICE_ID_JOYPAD_UP] = 1 << GBTN_UP, - [RETRO_DEVICE_ID_JOYPAD_DOWN] = 1 << GBTN_DOWN, - [RETRO_DEVICE_ID_JOYPAD_LEFT] = 1 << GBTN_LEFT, - [RETRO_DEVICE_ID_JOYPAD_RIGHT] = 1 << GBTN_RIGHT, - [RETRO_DEVICE_ID_JOYPAD_A] = 1 << GBTN_C, - [RETRO_DEVICE_ID_JOYPAD_X] = 1 << GBTN_Y, - [RETRO_DEVICE_ID_JOYPAD_L] = 1 << GBTN_X, - [RETRO_DEVICE_ID_JOYPAD_R] = 1 << GBTN_Z, -}; -#define RETRO_PICO_MAP_LEN (sizeof(retro_pico_map) / sizeof(retro_pico_map[0])) - -static void snd_write(int len) -{ - audio_batch_cb(PsndOut, len / 4); -} - -static enum input_device input_name_to_val(const char *name) -{ - if (strcmp(name, "3 button pad") == 0) - return PICO_INPUT_PAD_3BTN; - if (strcmp(name, "6 button pad") == 0) - return PICO_INPUT_PAD_6BTN; - if (strcmp(name, "None") == 0) - return PICO_INPUT_NOTHING; - - lprintf("invalid picodrive_input: '%s'\n", name); - return PICO_INPUT_PAD_3BTN; -} - -static void update_variables(void) -{ - struct retro_variable var; - - var.value = NULL; - var.key = "picodrive_input1"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) - PicoSetInputDevice(0, input_name_to_val(var.value)); - - var.value = NULL; - var.key = "picodrive_input2"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) - PicoSetInputDevice(1, input_name_to_val(var.value)); - - var.value = NULL; - var.key = "picodrive_sprlim"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - if (strcmp(var.value, "enabled") == 0) - PicoOpt |= POPT_DIS_SPRITE_LIM; - else - PicoOpt &= ~POPT_DIS_SPRITE_LIM; - } - - var.value = NULL; - var.key = "picodrive_ramcart"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - if (strcmp(var.value, "enabled") == 0) - PicoOpt |= POPT_EN_MCD_RAMCART; - else - PicoOpt &= ~POPT_EN_MCD_RAMCART; - } - -#ifdef DRC_SH2 - var.value = NULL; - var.key = "picodrive_drc"; - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value) { - if (strcmp(var.value, "enabled") == 0) - PicoOpt |= POPT_EN_DRC; - else - PicoOpt &= ~POPT_EN_DRC; - } -#endif -} - -void retro_run(void) -{ - bool updated = false; - int pad, i; - - if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE, &updated) && updated) - update_variables(); - - input_poll_cb(); - - PicoPad[0] = PicoPad[1] = 0; - for (pad = 0; pad < 2; pad++) - for (i = 0; i < RETRO_PICO_MAP_LEN; i++) - if (input_state_cb(pad, RETRO_DEVICE_JOYPAD, 0, i)) - PicoPad[pad] |= retro_pico_map[i]; - - PicoFrame(); - - video_cb(vout_buf, vout_width, vout_height, vout_width * 2); -} - -void retro_init(void) -{ - int level; - -#ifdef IOS - emu_log = fopen("/User/Documents/PicoDrive.log", "w"); - if (emu_log == NULL) - emu_log = fopen("PicoDrive.log", "w"); - if (emu_log == NULL) -#endif - emu_log = stdout; - - level = 0; - environ_cb(RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL, &level); - - environ_cb(RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE, &disk_control); - - PicoOpt = POPT_EN_STEREO|POPT_EN_FM|POPT_EN_PSG|POPT_EN_Z80 - | POPT_EN_MCD_PCM|POPT_EN_MCD_CDDA|POPT_EN_MCD_GFX - | POPT_EN_32X|POPT_EN_PWM - | POPT_ACC_SPRITES|POPT_DIS_32C_BORDER; -#ifdef __arm__ - PicoOpt |= POPT_EN_DRC; -#endif - PsndRate = 44100; - PicoAutoRgnOrder = 0x184; // US, EU, JP - PicoCDBuffers = 0; - - vout_width = 320; - vout_height = 240; - vout_buf = malloc(VOUT_MAX_WIDTH * VOUT_MAX_HEIGHT * 2); - - PicoInit(); - PicoDrawSetOutFormat(PDF_RGB555, 0); - PicoDrawSetOutBuf(vout_buf, vout_width * 2); - - //PicoMessage = plat_status_msg_busy_next; - PicoMCDopenTray = disk_tray_open; - PicoMCDcloseTray = disk_tray_close; - - update_variables(); -} - -void retro_deinit(void) -{ - PicoExit(); -} diff --git a/platform/libretro.h b/platform/libretro.h deleted file mode 100644 index ff4f4fd9..00000000 --- a/platform/libretro.h +++ /dev/null @@ -1,787 +0,0 @@ -/* Copyright (C) 2010-2013 The RetroArch team - * - * --------------------------------------------------------------------------------------- - * The following license statement only applies to this libretro API header (libretro.h). - * --------------------------------------------------------------------------------------- - * - * Permission is hereby granted, free of charge, - * to any person obtaining a copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, - * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef LIBRETRO_H__ -#define LIBRETRO_H__ - -#include -#include -#include - -// Hack applied for MSVC when compiling in C89 mode as it isn't C99 compliant. -#ifdef __cplusplus -extern "C" { -#else -#if defined(_MSC_VER) && !defined(SN_TARGET_PS3) && !defined(__cplusplus) -#define bool unsigned char -#define true 1 -#define false 0 -#else -#include -#endif -#endif - -// Used for checking API/ABI mismatches that can break libretro implementations. -// It is not incremented for compatible changes to the API. -#define RETRO_API_VERSION 1 - -// Libretro's fundamental device abstractions. -#define RETRO_DEVICE_MASK 0xff -#define RETRO_DEVICE_NONE 0 - -// The JOYPAD is called RetroPad. It is essentially a Super Nintendo controller, -// but with additional L2/R2/L3/R3 buttons, similar to a PS1 DualShock. -#define RETRO_DEVICE_JOYPAD 1 - -// The mouse is a simple mouse, similar to Super Nintendo's mouse. -// X and Y coordinates are reported relatively to last poll (poll callback). -// It is up to the libretro implementation to keep track of where the mouse pointer is supposed to be on the screen. -// The frontend must make sure not to interfere with its own hardware mouse pointer. -#define RETRO_DEVICE_MOUSE 2 - -// KEYBOARD device lets one poll for raw key pressed. -// It is poll based, so input callback will return with the current pressed state. -#define RETRO_DEVICE_KEYBOARD 3 - -// Lightgun X/Y coordinates are reported relatively to last poll, similar to mouse. -#define RETRO_DEVICE_LIGHTGUN 4 - -// The ANALOG device is an extension to JOYPAD (RetroPad). -// Similar to DualShock it adds two analog sticks. -// This is treated as a separate device type as it returns values in the full analog range -// of [-0x8000, 0x7fff]. Positive X axis is right. Positive Y axis is down. -// Only use ANALOG type when polling for analog values of the axes. -#define RETRO_DEVICE_ANALOG 5 - -// Abstracts the concept of a pointing mechanism, e.g. touch. -// This allows libretro to query in absolute coordinates where on the screen a mouse (or something similar) is being placed. -// For a touch centric device, coordinates reported are the coordinates of the press. -// -// Coordinates in X and Y are reported as: -// [-0x7fff, 0x7fff]: -0x7fff corresponds to the far left/top of the screen, -// and 0x7fff corresponds to the far right/bottom of the screen. -// The "screen" is here defined as area that is passed to the frontend and later displayed on the monitor. -// The frontend is free to scale/resize this screen as it sees fit, however, -// (X, Y) = (-0x7fff, -0x7fff) will correspond to the top-left pixel of the game image, etc. -// -// To check if the pointer coordinates are valid (e.g. a touch display actually being touched), -// PRESSED returns 1 or 0. -// If using a mouse, PRESSED will usually correspond to the left mouse button. -// PRESSED will only return 1 if the pointer is inside the game screen. -// -// For multi-touch, the index variable can be used to successively query more presses. -// If index = 0 returns true for _PRESSED, coordinates can be extracted -// with _X, _Y for index = 0. One can then query _PRESSED, _X, _Y with index = 1, and so on. -// Eventually _PRESSED will return false for an index. No further presses are registered at this point. -#define RETRO_DEVICE_POINTER 6 - -// These device types are specializations of the base types above. -// They should only be used in retro_set_controller_type() to inform libretro implementations -// about use of a very specific device type. -// -// In input state callback, however, only the base type should be used in the 'device' field. -#define RETRO_DEVICE_JOYPAD_MULTITAP ((1 << 8) | RETRO_DEVICE_JOYPAD) -#define RETRO_DEVICE_LIGHTGUN_SUPER_SCOPE ((1 << 8) | RETRO_DEVICE_LIGHTGUN) -#define RETRO_DEVICE_LIGHTGUN_JUSTIFIER ((2 << 8) | RETRO_DEVICE_LIGHTGUN) -#define RETRO_DEVICE_LIGHTGUN_JUSTIFIERS ((3 << 8) | RETRO_DEVICE_LIGHTGUN) - -// Buttons for the RetroPad (JOYPAD). -// The placement of these is equivalent to placements on the Super Nintendo controller. -// L2/R2/L3/R3 buttons correspond to the PS1 DualShock. -#define RETRO_DEVICE_ID_JOYPAD_B 0 -#define RETRO_DEVICE_ID_JOYPAD_Y 1 -#define RETRO_DEVICE_ID_JOYPAD_SELECT 2 -#define RETRO_DEVICE_ID_JOYPAD_START 3 -#define RETRO_DEVICE_ID_JOYPAD_UP 4 -#define RETRO_DEVICE_ID_JOYPAD_DOWN 5 -#define RETRO_DEVICE_ID_JOYPAD_LEFT 6 -#define RETRO_DEVICE_ID_JOYPAD_RIGHT 7 -#define RETRO_DEVICE_ID_JOYPAD_A 8 -#define RETRO_DEVICE_ID_JOYPAD_X 9 -#define RETRO_DEVICE_ID_JOYPAD_L 10 -#define RETRO_DEVICE_ID_JOYPAD_R 11 -#define RETRO_DEVICE_ID_JOYPAD_L2 12 -#define RETRO_DEVICE_ID_JOYPAD_R2 13 -#define RETRO_DEVICE_ID_JOYPAD_L3 14 -#define RETRO_DEVICE_ID_JOYPAD_R3 15 - -// Index / Id values for ANALOG device. -#define RETRO_DEVICE_INDEX_ANALOG_LEFT 0 -#define RETRO_DEVICE_INDEX_ANALOG_RIGHT 1 -#define RETRO_DEVICE_ID_ANALOG_X 0 -#define RETRO_DEVICE_ID_ANALOG_Y 1 - -// Id values for MOUSE. -#define RETRO_DEVICE_ID_MOUSE_X 0 -#define RETRO_DEVICE_ID_MOUSE_Y 1 -#define RETRO_DEVICE_ID_MOUSE_LEFT 2 -#define RETRO_DEVICE_ID_MOUSE_RIGHT 3 - -// Id values for LIGHTGUN types. -#define RETRO_DEVICE_ID_LIGHTGUN_X 0 -#define RETRO_DEVICE_ID_LIGHTGUN_Y 1 -#define RETRO_DEVICE_ID_LIGHTGUN_TRIGGER 2 -#define RETRO_DEVICE_ID_LIGHTGUN_CURSOR 3 -#define RETRO_DEVICE_ID_LIGHTGUN_TURBO 4 -#define RETRO_DEVICE_ID_LIGHTGUN_PAUSE 5 -#define RETRO_DEVICE_ID_LIGHTGUN_START 6 - -// Id values for POINTER. -#define RETRO_DEVICE_ID_POINTER_X 0 -#define RETRO_DEVICE_ID_POINTER_Y 1 -#define RETRO_DEVICE_ID_POINTER_PRESSED 2 - -// Returned from retro_get_region(). -#define RETRO_REGION_NTSC 0 -#define RETRO_REGION_PAL 1 - -// Passed to retro_get_memory_data/size(). -// If the memory type doesn't apply to the implementation NULL/0 can be returned. -#define RETRO_MEMORY_MASK 0xff - -// Regular save ram. This ram is usually found on a game cartridge, backed up by a battery. -// If save game data is too complex for a single memory buffer, -// the SYSTEM_DIRECTORY environment callback can be used. -#define RETRO_MEMORY_SAVE_RAM 0 - -// Some games have a built-in clock to keep track of time. -// This memory is usually just a couple of bytes to keep track of time. -#define RETRO_MEMORY_RTC 1 - -// System ram lets a frontend peek into a game systems main RAM. -#define RETRO_MEMORY_SYSTEM_RAM 2 - -// Video ram lets a frontend peek into a game systems video RAM (VRAM). -#define RETRO_MEMORY_VIDEO_RAM 3 - -// Special memory types. -#define RETRO_MEMORY_SNES_BSX_RAM ((1 << 8) | RETRO_MEMORY_SAVE_RAM) -#define RETRO_MEMORY_SNES_BSX_PRAM ((2 << 8) | RETRO_MEMORY_SAVE_RAM) -#define RETRO_MEMORY_SNES_SUFAMI_TURBO_A_RAM ((3 << 8) | RETRO_MEMORY_SAVE_RAM) -#define RETRO_MEMORY_SNES_SUFAMI_TURBO_B_RAM ((4 << 8) | RETRO_MEMORY_SAVE_RAM) -#define RETRO_MEMORY_SNES_GAME_BOY_RAM ((5 << 8) | RETRO_MEMORY_SAVE_RAM) -#define RETRO_MEMORY_SNES_GAME_BOY_RTC ((6 << 8) | RETRO_MEMORY_RTC) - -// Special game types passed into retro_load_game_special(). -// Only used when multiple ROMs are required. -#define RETRO_GAME_TYPE_BSX 0x101 -#define RETRO_GAME_TYPE_BSX_SLOTTED 0x102 -#define RETRO_GAME_TYPE_SUFAMI_TURBO 0x103 -#define RETRO_GAME_TYPE_SUPER_GAME_BOY 0x104 - -// Keysyms used for ID in input state callback when polling RETRO_KEYBOARD. -enum retro_key -{ - RETROK_UNKNOWN = 0, - RETROK_FIRST = 0, - RETROK_BACKSPACE = 8, - RETROK_TAB = 9, - RETROK_CLEAR = 12, - RETROK_RETURN = 13, - RETROK_PAUSE = 19, - RETROK_ESCAPE = 27, - RETROK_SPACE = 32, - RETROK_EXCLAIM = 33, - RETROK_QUOTEDBL = 34, - RETROK_HASH = 35, - RETROK_DOLLAR = 36, - RETROK_AMPERSAND = 38, - RETROK_QUOTE = 39, - RETROK_LEFTPAREN = 40, - RETROK_RIGHTPAREN = 41, - RETROK_ASTERISK = 42, - RETROK_PLUS = 43, - RETROK_COMMA = 44, - RETROK_MINUS = 45, - RETROK_PERIOD = 46, - RETROK_SLASH = 47, - RETROK_0 = 48, - RETROK_1 = 49, - RETROK_2 = 50, - RETROK_3 = 51, - RETROK_4 = 52, - RETROK_5 = 53, - RETROK_6 = 54, - RETROK_7 = 55, - RETROK_8 = 56, - RETROK_9 = 57, - RETROK_COLON = 58, - RETROK_SEMICOLON = 59, - RETROK_LESS = 60, - RETROK_EQUALS = 61, - RETROK_GREATER = 62, - RETROK_QUESTION = 63, - RETROK_AT = 64, - RETROK_LEFTBRACKET = 91, - RETROK_BACKSLASH = 92, - RETROK_RIGHTBRACKET = 93, - RETROK_CARET = 94, - RETROK_UNDERSCORE = 95, - RETROK_BACKQUOTE = 96, - RETROK_a = 97, - RETROK_b = 98, - RETROK_c = 99, - RETROK_d = 100, - RETROK_e = 101, - RETROK_f = 102, - RETROK_g = 103, - RETROK_h = 104, - RETROK_i = 105, - RETROK_j = 106, - RETROK_k = 107, - RETROK_l = 108, - RETROK_m = 109, - RETROK_n = 110, - RETROK_o = 111, - RETROK_p = 112, - RETROK_q = 113, - RETROK_r = 114, - RETROK_s = 115, - RETROK_t = 116, - RETROK_u = 117, - RETROK_v = 118, - RETROK_w = 119, - RETROK_x = 120, - RETROK_y = 121, - RETROK_z = 122, - RETROK_DELETE = 127, - - RETROK_KP0 = 256, - RETROK_KP1 = 257, - RETROK_KP2 = 258, - RETROK_KP3 = 259, - RETROK_KP4 = 260, - RETROK_KP5 = 261, - RETROK_KP6 = 262, - RETROK_KP7 = 263, - RETROK_KP8 = 264, - RETROK_KP9 = 265, - RETROK_KP_PERIOD = 266, - RETROK_KP_DIVIDE = 267, - RETROK_KP_MULTIPLY = 268, - RETROK_KP_MINUS = 269, - RETROK_KP_PLUS = 270, - RETROK_KP_ENTER = 271, - RETROK_KP_EQUALS = 272, - - RETROK_UP = 273, - RETROK_DOWN = 274, - RETROK_RIGHT = 275, - RETROK_LEFT = 276, - RETROK_INSERT = 277, - RETROK_HOME = 278, - RETROK_END = 279, - RETROK_PAGEUP = 280, - RETROK_PAGEDOWN = 281, - - RETROK_F1 = 282, - RETROK_F2 = 283, - RETROK_F3 = 284, - RETROK_F4 = 285, - RETROK_F5 = 286, - RETROK_F6 = 287, - RETROK_F7 = 288, - RETROK_F8 = 289, - RETROK_F9 = 290, - RETROK_F10 = 291, - RETROK_F11 = 292, - RETROK_F12 = 293, - RETROK_F13 = 294, - RETROK_F14 = 295, - RETROK_F15 = 296, - - RETROK_NUMLOCK = 300, - RETROK_CAPSLOCK = 301, - RETROK_SCROLLOCK = 302, - RETROK_RSHIFT = 303, - RETROK_LSHIFT = 304, - RETROK_RCTRL = 305, - RETROK_LCTRL = 306, - RETROK_RALT = 307, - RETROK_LALT = 308, - RETROK_RMETA = 309, - RETROK_LMETA = 310, - RETROK_LSUPER = 311, - RETROK_RSUPER = 312, - RETROK_MODE = 313, - RETROK_COMPOSE = 314, - - RETROK_HELP = 315, - RETROK_PRINT = 316, - RETROK_SYSREQ = 317, - RETROK_BREAK = 318, - RETROK_MENU = 319, - RETROK_POWER = 320, - RETROK_EURO = 321, - RETROK_UNDO = 322, - - RETROK_LAST, - - RETROK_DUMMY = INT_MAX // Ensure sizeof(enum) == sizeof(int) -}; - -enum retro_mod -{ - RETROKMOD_NONE = 0x0000, - - RETROKMOD_SHIFT = 0x01, - RETROKMOD_CTRL = 0x02, - RETROKMOD_ALT = 0x04, - RETROKMOD_META = 0x08, - - RETROKMOD_NUMLOCK = 0x10, - RETROKMOD_CAPSLOCK = 0x20, - RETROKMOD_SCROLLOCK = 0x40, - - RETROKMOD_DUMMY = INT_MAX // Ensure sizeof(enum) == sizeof(int) -}; - -// If set, this call is not part of the public libretro API yet. It can change or be removed at any time. -#define RETRO_ENVIRONMENT_EXPERIMENTAL 0x10000 - -// Environment commands. -#define RETRO_ENVIRONMENT_SET_ROTATION 1 // const unsigned * -- - // Sets screen rotation of graphics. - // Is only implemented if rotation can be accelerated by hardware. - // Valid values are 0, 1, 2, 3, which rotates screen by 0, 90, 180, 270 degrees - // counter-clockwise respectively. - // -#define RETRO_ENVIRONMENT_GET_OVERSCAN 2 // bool * -- - // Boolean value whether or not the implementation should use overscan, or crop away overscan. - // -#define RETRO_ENVIRONMENT_GET_CAN_DUPE 3 // bool * -- - // Boolean value whether or not frontend supports frame duping, - // passing NULL to video frame callback. - // -// Environ 4, 5 are no longer supported (GET_VARIABLE / SET_VARIABLES), and reserved to avoid possible ABI clash. -#define RETRO_ENVIRONMENT_SET_MESSAGE 6 // const struct retro_message * -- - // Sets a message to be displayed in implementation-specific manner for a certain amount of 'frames'. - // Should not be used for trivial messages, which should simply be logged to stderr. -#define RETRO_ENVIRONMENT_SHUTDOWN 7 // N/A (NULL) -- - // Requests the frontend to shutdown. - // Should only be used if game has a specific - // way to shutdown the game from a menu item or similar. - // -#define RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL 8 - // const unsigned * -- - // Gives a hint to the frontend how demanding this implementation - // is on a system. E.g. reporting a level of 2 means - // this implementation should run decently on all frontends - // of level 2 and up. - // - // It can be used by the frontend to potentially warn - // about too demanding implementations. - // - // The levels are "floating", but roughly defined as: - // 0: Low-powered embedded devices such as Raspberry Pi - // 1: 6th generation consoles, such as Wii/Xbox 1, and phones, tablets, etc. - // 2: 7th generation consoles, such as PS3/360, with sub-par CPUs. - // 3: Modern desktop/laptops with reasonably powerful CPUs. - // 4: High-end desktops with very powerful CPUs. - // - // This function can be called on a per-game basis, - // as certain games an implementation can play might be - // particularily demanding. - // If called, it should be called in retro_load_game(). - // -#define RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY 9 - // const char ** -- - // Returns the "system" directory of the frontend. - // This directory can be used to store system specific ROMs such as BIOSes, configuration data, etc. - // The returned value can be NULL. - // If so, no such directory is defined, - // and it's up to the implementation to find a suitable directory. - // -#define RETRO_ENVIRONMENT_SET_PIXEL_FORMAT 10 - // const enum retro_pixel_format * -- - // Sets the internal pixel format used by the implementation. - // The default pixel format is RETRO_PIXEL_FORMAT_0RGB1555. - // This pixel format however, is deprecated (see enum retro_pixel_format). - // If the call returns false, the frontend does not support this pixel format. - // This function should be called inside retro_load_game() or retro_get_system_av_info(). - // -#define RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS 11 - // const struct retro_input_descriptor * -- - // Sets an array of retro_input_descriptors. - // It is up to the frontend to present this in a usable way. - // The array is terminated by retro_input_descriptor::description being set to NULL. - // This function can be called at any time, but it is recommended to call it as early as possible. -#define RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK 12 - // const struct retro_keyboard_callback * -- - // Sets a callback function used to notify core about keyboard events. - // -#define RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE 13 - // const struct retro_disk_control_callback * -- - // Sets an interface which frontend can use to eject and insert disk images. - // This is used for games which consist of multiple images and must be manually - // swapped out by the user (e.g. PSX). -#define RETRO_ENVIRONMENT_SET_HW_RENDER (14 | RETRO_ENVIRONMENT_EXPERIMENTAL) - // struct retro_hw_render_callback * -- - // NOTE: This call is currently very experimental, and should not be considered part of the public API. - // The interface could be changed or removed at any time. - // Sets an interface to let a libretro core render with hardware acceleration. - // Should be called in retro_load_game(). - // If successful, libretro cores will be able to render to a frontend-provided framebuffer. - // The size of this framebuffer will be at least as large as max_width/max_height provided in get_av_info(). - // If HW rendering is used, pass only RETRO_HW_FRAME_BUFFER_VALID or NULL to retro_video_refresh_t. -#define RETRO_ENVIRONMENT_GET_VARIABLE 15 - // struct retro_variable * -- - // Interface to aquire user-defined information from environment - // that cannot feasibly be supported in a multi-system way. - // 'key' should be set to a key which has already been set by SET_VARIABLES. - // 'data' will be set to a value or NULL. - // -#define RETRO_ENVIRONMENT_SET_VARIABLES 16 - // const struct retro_variable * -- - // Allows an implementation to signal the environment - // which variables it might want to check for later using GET_VARIABLE. - // This allows the frontend to present these variables to a user dynamically. - // This should be called as early as possible (ideally in retro_set_environment). - // - // 'data' points to an array of retro_variable structs terminated by a { NULL, NULL } element. - // retro_variable::key should be namespaced to not collide with other implementations' keys. E.g. A core called 'foo' should use keys named as 'foo_option'. - // retro_variable::value should contain a human readable description of the key as well as a '|' delimited list of expected values. - // The number of possible options should be very limited, i.e. it should be feasible to cycle through options without a keyboard. - // First entry should be treated as a default. - // - // Example entry: - // { "foo_option", "Speed hack coprocessor X; false|true" } - // - // Text before first ';' is description. This ';' must be followed by a space, and followed by a list of possible values split up with '|'. - // Only strings are operated on. The possible values will generally be displayed and stored as-is by the frontend. - // -#define RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE 17 - // bool * -- - // Result is set to true if some variables are updated by - // frontend since last call to RETRO_ENVIRONMENT_GET_VARIABLE. - // Variables should be queried with GET_VARIABLE. - // -#define RETRO_ENVIRONMENT_SET_SUPPORT_NO_GAME 18 - // const bool * -- - // If true, the libretro implementation supports calls to retro_load_game() with NULL as argument. - // Used by cores which can run without particular game data. - // This should be called within retro_set_environment() only. - - -// Pass this to retro_video_refresh_t if rendering to hardware. -// Passing NULL to retro_video_refresh_t is still a frame dupe as normal. -#define RETRO_HW_FRAME_BUFFER_VALID ((void*)-1) - -// Invalidates the current HW context. -// If called, all GPU resources must be reinitialized. -// Usually called when frontend reinits video driver. -// Also called first time video driver is initialized, allowing libretro core to init resources. -typedef void (*retro_hw_context_reset_t)(void); -// Gets current framebuffer which is to be rendered to. Could change every frame potentially. -typedef uintptr_t (*retro_hw_get_current_framebuffer_t)(void); - -// Get a symbol from HW context. -typedef void (*retro_proc_address_t)(void); -typedef retro_proc_address_t (*retro_hw_get_proc_address_t)(const char *sym); - -enum retro_hw_context_type -{ - RETRO_HW_CONTEXT_NONE = 0, - RETRO_HW_CONTEXT_OPENGL, // OpenGL 2.x. Latest version available before 3.x+. - RETRO_HW_CONTEXT_OPENGLES2, // GLES 2.0 - - RETRO_HW_CONTEXT_DUMMY = INT_MAX -}; - -struct retro_hw_render_callback -{ - enum retro_hw_context_type context_type; // Which API to use. Set by libretro core. - retro_hw_context_reset_t context_reset; // Set by libretro core. - retro_hw_get_current_framebuffer_t get_current_framebuffer; // Set by frontend. - retro_hw_get_proc_address_t get_proc_address; // Set by frontend. - bool depth; // Set if render buffers should have depth component attached. -}; - -// Callback type passed in RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK. Called by the frontend in response to keyboard events. -// down is set if the key is being pressed, or false if it is being released. -// keycode is the RETROK value of the char. -// character is the text character of the pressed key. (UTF-32). -// key_modifiers is a set of RETROKMOD values or'ed together. -typedef void (*retro_keyboard_event_t)(bool down, unsigned keycode, uint32_t character, uint16_t key_modifiers); - -struct retro_keyboard_callback -{ - retro_keyboard_event_t callback; -}; - -// Callbacks for RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE. -// Should be set for implementations which can swap out multiple disk images in runtime. -// If the implementation can do this automatically, it should strive to do so. -// However, there are cases where the user must manually do so. -// -// Overview: To swap a disk image, eject the disk image with set_eject_state(true). -// Set the disk index with set_image_index(index). Insert the disk again with set_eject_state(false). - -// If ejected is true, "ejects" the virtual disk tray. -// When ejected, the disk image index can be set. -typedef bool (*retro_set_eject_state_t)(bool ejected); -// Gets current eject state. The initial state is 'not ejected'. -typedef bool (*retro_get_eject_state_t)(void); -// Gets current disk index. First disk is index 0. -// If return value is >= get_num_images(), no disk is currently inserted. -typedef unsigned (*retro_get_image_index_t)(void); -// Sets image index. Can only be called when disk is ejected. -// The implementation supports setting "no disk" by using an index >= get_num_images(). -typedef bool (*retro_set_image_index_t)(unsigned index); -// Gets total number of images which are available to use. -typedef unsigned (*retro_get_num_images_t)(void); -// -// Replaces the disk image associated with index. -// Arguments to pass in info have same requirements as retro_load_game(). -// Virtual disk tray must be ejected when calling this. -// Replacing a disk image with info = NULL will remove the disk image from the internal list. -// As a result, calls to get_image_index() can change. -// -// E.g. replace_image_index(1, NULL), and previous get_image_index() returned 4 before. -// Index 1 will be removed, and the new index is 3. -struct retro_game_info; -typedef bool (*retro_replace_image_index_t)(unsigned index, const struct retro_game_info *info); -// Adds a new valid index (get_num_images()) to the internal disk list. -// This will increment subsequent return values from get_num_images() by 1. -// This image index cannot be used until a disk image has been set with replace_image_index. -typedef bool (*retro_add_image_index_t)(void); - -struct retro_disk_control_callback -{ - retro_set_eject_state_t set_eject_state; - retro_get_eject_state_t get_eject_state; - - retro_get_image_index_t get_image_index; - retro_set_image_index_t set_image_index; - retro_get_num_images_t get_num_images; - - retro_replace_image_index_t replace_image_index; - retro_add_image_index_t add_image_index; -}; - -enum retro_pixel_format -{ - // 0RGB1555, native endian. 0 bit must be set to 0. - // This pixel format is default for compatibility concerns only. - // If a 15/16-bit pixel format is desired, consider using RGB565. - RETRO_PIXEL_FORMAT_0RGB1555 = 0, - - // XRGB8888, native endian. X bits are ignored. - RETRO_PIXEL_FORMAT_XRGB8888 = 1, - - // RGB565, native endian. This pixel format is the recommended format to use if a 15/16-bit format is desired - // as it is the pixel format that is typically available on a wide range of low-power devices. - // It is also natively supported in APIs like OpenGL ES. - RETRO_PIXEL_FORMAT_RGB565 = 2, - - // Ensure sizeof() == sizeof(int). - RETRO_PIXEL_FORMAT_UNKNOWN = INT_MAX -}; - -struct retro_message -{ - const char *msg; // Message to be displayed. - unsigned frames; // Duration in frames of message. -}; - -// Describes how the libretro implementation maps a libretro input bind -// to its internal input system through a human readable string. -// This string can be used to better let a user configure input. -struct retro_input_descriptor -{ - // Associates given parameters with a description. - unsigned port; - unsigned device; - unsigned index; - unsigned id; - - const char *description; // Human readable description for parameters. - // The pointer must remain valid until retro_unload_game() is called. -}; - -struct retro_system_info -{ - // All pointers are owned by libretro implementation, and pointers must remain valid until retro_deinit() is called. - - const char *library_name; // Descriptive name of library. Should not contain any version numbers, etc. - const char *library_version; // Descriptive version of core. - - const char *valid_extensions; // A string listing probably rom extensions the core will be able to load, separated with pipe. - // I.e. "bin|rom|iso". - // Typically used for a GUI to filter out extensions. - - bool need_fullpath; // If true, retro_load_game() is guaranteed to provide a valid pathname in retro_game_info::path. - // ::data and ::size are both invalid. - // If false, ::data and ::size are guaranteed to be valid, but ::path might not be valid. - // This is typically set to true for libretro implementations that must load from file. - // Implementations should strive for setting this to false, as it allows the frontend to perform patching, etc. - - bool block_extract; // If true, the frontend is not allowed to extract any archives before loading the real ROM. - // Necessary for certain libretro implementations that load games from zipped archives. -}; - -struct retro_game_geometry -{ - unsigned base_width; // Nominal video width of game. - unsigned base_height; // Nominal video height of game. - unsigned max_width; // Maximum possible width of game. - unsigned max_height; // Maximum possible height of game. - - float aspect_ratio; // Nominal aspect ratio of game. If aspect_ratio is <= 0.0, - // an aspect ratio of base_width / base_height is assumed. - // A frontend could override this setting if desired. -}; - -struct retro_system_timing -{ - double fps; // FPS of video content. - double sample_rate; // Sampling rate of audio. -}; - -struct retro_system_av_info -{ - struct retro_game_geometry geometry; - struct retro_system_timing timing; -}; - -struct retro_variable -{ - const char *key; // Variable to query in RETRO_ENVIRONMENT_GET_VARIABLE. - // If NULL, obtains the complete environment string if more complex parsing is necessary. - // The environment string is formatted as key-value pairs delimited by semicolons as so: - // "key1=value1;key2=value2;..." - const char *value; // Value to be obtained. If key does not exist, it is set to NULL. -}; - -struct retro_game_info -{ - const char *path; // Path to game, UTF-8 encoded. Usually used as a reference. - // May be NULL if rom was loaded from stdin or similar. - // retro_system_info::need_fullpath guaranteed that this path is valid. - const void *data; // Memory buffer of loaded game. Will be NULL if need_fullpath was set. - size_t size; // Size of memory buffer. - const char *meta; // String of implementation specific meta-data. -}; - -// Callbacks -// -// Environment callback. Gives implementations a way of performing uncommon tasks. Extensible. -typedef bool (*retro_environment_t)(unsigned cmd, void *data); - -// Render a frame. Pixel format is 15-bit 0RGB1555 native endian unless changed (see RETRO_ENVIRONMENT_SET_PIXEL_FORMAT). -// Width and height specify dimensions of buffer. -// Pitch specifices length in bytes between two lines in buffer. -// For performance reasons, it is highly recommended to have a frame that is packed in memory, i.e. pitch == width * byte_per_pixel. -// Certain graphic APIs, such as OpenGL ES, do not like textures that are not packed in memory. -typedef void (*retro_video_refresh_t)(const void *data, unsigned width, unsigned height, size_t pitch); - -// Renders a single audio frame. Should only be used if implementation generates a single sample at a time. -// Format is signed 16-bit native endian. -typedef void (*retro_audio_sample_t)(int16_t left, int16_t right); -// Renders multiple audio frames in one go. One frame is defined as a sample of left and right channels, interleaved. -// I.e. int16_t buf[4] = { l, r, l, r }; would be 2 frames. -// Only one of the audio callbacks must ever be used. -typedef size_t (*retro_audio_sample_batch_t)(const int16_t *data, size_t frames); - -// Polls input. -typedef void (*retro_input_poll_t)(void); -// Queries for input for player 'port'. device will be masked with RETRO_DEVICE_MASK. -// Specialization of devices such as RETRO_DEVICE_JOYPAD_MULTITAP that have been set with retro_set_controller_port_device() -// will still use the higher level RETRO_DEVICE_JOYPAD to request input. -typedef int16_t (*retro_input_state_t)(unsigned port, unsigned device, unsigned index, unsigned id); - -// Sets callbacks. retro_set_environment() is guaranteed to be called before retro_init(). -// The rest of the set_* functions are guaranteed to have been called before the first call to retro_run() is made. -void retro_set_environment(retro_environment_t); -void retro_set_video_refresh(retro_video_refresh_t); -void retro_set_audio_sample(retro_audio_sample_t); -void retro_set_audio_sample_batch(retro_audio_sample_batch_t); -void retro_set_input_poll(retro_input_poll_t); -void retro_set_input_state(retro_input_state_t); - -// Library global initialization/deinitialization. -void retro_init(void); -void retro_deinit(void); - -// Must return RETRO_API_VERSION. Used to validate ABI compatibility when the API is revised. -unsigned retro_api_version(void); - -// Gets statically known system info. Pointers provided in *info must be statically allocated. -// Can be called at any time, even before retro_init(). -void retro_get_system_info(struct retro_system_info *info); - -// Gets information about system audio/video timings and geometry. -// Can be called only after retro_load_game() has successfully completed. -// NOTE: The implementation of this function might not initialize every variable if needed. -// E.g. geom.aspect_ratio might not be initialized if core doesn't desire a particular aspect ratio. -void retro_get_system_av_info(struct retro_system_av_info *info); - -// Sets device to be used for player 'port'. -void retro_set_controller_port_device(unsigned port, unsigned device); - -// Resets the current game. -void retro_reset(void); - -// Runs the game for one video frame. -// During retro_run(), input_poll callback must be called at least once. -// -// If a frame is not rendered for reasons where a game "dropped" a frame, -// this still counts as a frame, and retro_run() should explicitly dupe a frame if GET_CAN_DUPE returns true. -// In this case, the video callback can take a NULL argument for data. -void retro_run(void); - -// Returns the amount of data the implementation requires to serialize internal state (save states). -// Beetween calls to retro_load_game() and retro_unload_game(), the returned size is never allowed to be larger than a previous returned value, to -// ensure that the frontend can allocate a save state buffer once. -size_t retro_serialize_size(void); - -// Serializes internal state. If failed, or size is lower than retro_serialize_size(), it should return false, true otherwise. -bool retro_serialize(void *data, size_t size); -bool retro_unserialize(const void *data, size_t size); - -void retro_cheat_reset(void); -void retro_cheat_set(unsigned index, bool enabled, const char *code); - -// Loads a game. -bool retro_load_game(const struct retro_game_info *game); - -// Loads a "special" kind of game. Should not be used except in extreme cases. -bool retro_load_game_special( - unsigned game_type, - const struct retro_game_info *info, size_t num_info -); - -// Unloads a currently loaded game. -void retro_unload_game(void); - -// Gets region of game. -unsigned retro_get_region(void); - -// Gets region of memory. -void *retro_get_memory_data(unsigned id); -size_t retro_get_memory_size(unsigned id); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/platform/libretro/3ds/3ds_utils.c b/platform/libretro/3ds/3ds_utils.c new file mode 100644 index 00000000..11f55204 --- /dev/null +++ b/platform/libretro/3ds/3ds_utils.c @@ -0,0 +1,86 @@ + +#include "3ds_utils.h" + +#define GET_VERSION_MAJOR(version) ((version) >>24) + +typedef int (*ctr_callback_type)(void); + +int srvGetServiceHandle(unsigned int* out, const char* name); +int svcCloseHandle(unsigned int handle); +int svcBackdoor(ctr_callback_type); +int32_t svcGetSystemInfo(int64_t* out, uint32_t type, int32_t param); +void ctr_clear_cache(void); + +static int has_rosalina; + +void check_rosalina(void) { + int64_t version; + uint32_t major; + + has_rosalina = 0; + + if (!svcGetSystemInfo(&version, 0x10000, 0)) { + major = GET_VERSION_MAJOR(version); + + if (major >= 8) + has_rosalina = 1; + } +} + +static void ctr_enable_all_svc_kernel(void) +{ + __asm__ volatile("cpsid aif"); + + unsigned int* svc_access_control = *(*(unsigned int***)0xFFFF9000 + 0x22) - 0x6; + + svc_access_control[0]=0xFFFFFFFE; + svc_access_control[1]=0xFFFFFFFF; + svc_access_control[2]=0xFFFFFFFF; + svc_access_control[3]=0x3FFFFFFF; +} + +static void ctr_enable_all_svc(void) +{ + svcBackdoor((ctr_callback_type)ctr_enable_all_svc_kernel); +} + +static void ctr_clean_invalidate_kernel(void) +{ + __asm__ volatile( + "mrs r1, cpsr\n" + "cpsid aif\n" // disable interrupts + "mov r0, #0\n" + "mcr p15, 0, r0, c7, c10, 0\n" // clean dcache + "mcr p15, 0, r0, c7, c10, 4\n" // DSB + "mcr p15, 0, r0, c7, c5, 0\n" // invalidate icache+BTAC + "msr cpsr_cx, r1\n" // restore interrupts + ::: "r0", "r1"); +} + +void ctr_flush_invalidate_cache(void) +{ + if (has_rosalina) { + ctr_clear_cache(); + } else { + // __asm__ volatile("svc 0x2E\n\t"); + // __asm__ volatile("svc 0x4B\n\t"); + svcBackdoor((ctr_callback_type)ctr_clean_invalidate_kernel); + } +} + +int ctr_svchack_init(void) +{ + extern unsigned int __ctr_svchax; + extern unsigned int __service_ptr; + + if(__ctr_svchax) + return 1; /* All services have already been enabled */ + + if(__service_ptr) + return 0; + + /* CFW */ + ctr_enable_all_svc(); + return 1; +} + diff --git a/platform/libretro/3ds/3ds_utils.h b/platform/libretro/3ds/3ds_utils.h new file mode 100644 index 00000000..7835e2d3 --- /dev/null +++ b/platform/libretro/3ds/3ds_utils.h @@ -0,0 +1,14 @@ +#ifndef _3DS_UTILS_H +#define _3DS_UTILS_H + +void ctr_flush_invalidate_cache(void); + +int ctr_svchack_init(void); +void check_rosalina(void); + +#include +#define DEBUG_HOLD() do{printf("%s@%s:%d.\n",__FUNCTION__, __FILE__, __LINE__);fflush(stdout);wait_for_input();}while(0) + +void wait_for_input(void); + +#endif // _3DS_UTILS_H diff --git a/platform/libretro/3ds/utils.S b/platform/libretro/3ds/utils.S new file mode 100644 index 00000000..c8df651a --- /dev/null +++ b/platform/libretro/3ds/utils.S @@ -0,0 +1,25 @@ + .text + .arm + .balign 4 + + .func ctr_clear_cache_kernel +ctr_clear_cache_kernel: + cpsid aif + mov r0, #0 + mcr p15, 0, r0, c7, c10, 0 @ Clean entire data cache + mcr p15, 0, r0, c7, c10, 5 @ Data Memory Barrier + mcr p15, 0, r0, c7, c5, 0 @ Invalidate entire instruction cache / Flush BTB + mcr p15, 0, r0, c7, c10, 4 @ Data Sync Barrier + bx lr + .endfunc + + @@ Clear the entire data cache / invalidate the instruction cache. Uses + @@ Rosalina svcCustomBackdoor to avoid svcBackdoor stack corruption + @@ during interrupts. + .global ctr_clear_cache + .func ctr_clear_cache +ctr_clear_cache: + ldr r0, =ctr_clear_cache_kernel + svc 0x80 @ svcCustomBackdoor + bx lr + .endfunc diff --git a/platform/libretro/libretro-common/compat/compat_posix_string.c b/platform/libretro/libretro-common/compat/compat_posix_string.c new file mode 100644 index 00000000..6a2f07ee --- /dev/null +++ b/platform/libretro/libretro-common/compat/compat_posix_string.c @@ -0,0 +1,104 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (compat_posix_string.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +#include + +#ifdef _WIN32 + +#undef strcasecmp +#undef strdup +#undef isblank +#undef strtok_r +#include +#include +#include +#include + +#include + +int retro_strcasecmp__(const char *a, const char *b) +{ + while (*a && *b) + { + int a_ = tolower(*a); + int b_ = tolower(*b); + + if (a_ != b_) + return a_ - b_; + + a++; + b++; + } + + return tolower(*a) - tolower(*b); +} + +char *retro_strdup__(const char *orig) +{ + size_t len = strlen(orig) + 1; + char *ret = (char*)malloc(len); + if (!ret) + return NULL; + + strlcpy(ret, orig, len); + return ret; +} + +int retro_isblank__(int c) +{ + return (c == ' ') || (c == '\t'); +} + +char *retro_strtok_r__(char *str, const char *delim, char **saveptr) +{ + char *first = NULL; + if (!saveptr || !delim) + return NULL; + + if (str) + *saveptr = str; + + do + { + char *ptr = NULL; + first = *saveptr; + while (*first && strchr(delim, *first)) + *first++ = '\0'; + + if (*first == '\0') + return NULL; + + ptr = first + 1; + + while (*ptr && !strchr(delim, *ptr)) + ptr++; + + *saveptr = ptr + (*ptr ? 1 : 0); + *ptr = '\0'; + } while (strlen(first) == 0); + + return first; +} + +#endif diff --git a/platform/libretro/libretro-common/compat/compat_strcasestr.c b/platform/libretro/libretro-common/compat/compat_strcasestr.c new file mode 100644 index 00000000..4129dab2 --- /dev/null +++ b/platform/libretro/libretro-common/compat/compat_strcasestr.c @@ -0,0 +1,58 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (compat_strcasestr.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include + +#include + +/* Pretty much strncasecmp. */ +static int casencmp(const char *a, const char *b, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) + { + int a_lower = tolower(a[i]); + int b_lower = tolower(b[i]); + if (a_lower != b_lower) + return a_lower - b_lower; + } + + return 0; +} + +char *strcasestr_retro__(const char *haystack, const char *needle) +{ + size_t i, search_off; + size_t hay_len = strlen(haystack); + size_t needle_len = strlen(needle); + + if (needle_len > hay_len) + return NULL; + + search_off = hay_len - needle_len; + for (i = 0; i <= search_off; i++) + if (!casencmp(haystack + i, needle, needle_len)) + return (char*)haystack + i; + + return NULL; +} diff --git a/platform/libretro/libretro-common/compat/compat_strl.c b/platform/libretro/libretro-common/compat/compat_strl.c new file mode 100644 index 00000000..31723107 --- /dev/null +++ b/platform/libretro/libretro-common/compat/compat_strl.c @@ -0,0 +1,69 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (compat_strl.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include + +#include + +/* Implementation of strlcpy()/strlcat() based on OpenBSD. */ + +#ifndef __MACH__ + +size_t strlcpy(char *dest, const char *source, size_t size) +{ + size_t src_size = 0; + size_t n = size; + + if (n) + while (--n && (*dest++ = *source++)) src_size++; + + if (!n) + { + if (size) *dest = '\0'; + while (*source++) src_size++; + } + + return src_size; +} + +size_t strlcat(char *dest, const char *source, size_t size) +{ + size_t len = strlen(dest); + + dest += len; + + if (len > size) + size = 0; + else + size -= len; + + return len + strlcpy(dest, source, size); +} +#endif + +char *strldup(const char *s, size_t n) +{ + char *dst = (char*)malloc(sizeof(char) * (n + 1)); + strlcpy(dst, s, n); + return dst; +} diff --git a/platform/libretro/libretro-common/compat/fopen_utf8.c b/platform/libretro/libretro-common/compat/fopen_utf8.c new file mode 100644 index 00000000..85abb59e --- /dev/null +++ b/platform/libretro/libretro-common/compat/fopen_utf8.c @@ -0,0 +1,63 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (fopen_utf8.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x0500 || defined(_XBOX) +#ifndef LEGACY_WIN32 +#define LEGACY_WIN32 +#endif +#endif + +#ifdef _WIN32 +#undef fopen + +void *fopen_utf8(const char * filename, const char * mode) +{ +#if defined(LEGACY_WIN32) + FILE *ret = NULL; + char * filename_local = utf8_to_local_string_alloc(filename); + + if (!filename_local) + return NULL; + ret = fopen(filename_local, mode); + if (filename_local) + free(filename_local); + return ret; +#else + wchar_t * filename_w = utf8_to_utf16_string_alloc(filename); + wchar_t * mode_w = utf8_to_utf16_string_alloc(mode); + FILE* ret = NULL; + + if (filename_w && mode_w) + ret = _wfopen(filename_w, mode_w); + if (filename_w) + free(filename_w); + if (mode_w) + free(mode_w); + return ret; +#endif +} +#endif diff --git a/platform/libretro/libretro-common/encodings/encoding_utf.c b/platform/libretro/libretro-common/encodings/encoding_utf.c new file mode 100644 index 00000000..2760824d --- /dev/null +++ b/platform/libretro/libretro-common/encodings/encoding_utf.c @@ -0,0 +1,512 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (encoding_utf.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include + +#if defined(_WIN32) && !defined(_XBOX) +#include +#elif defined(_XBOX) +#include +#endif + +#define UTF8_WALKBYTE(string) (*((*(string))++)) + +static unsigned leading_ones(uint8_t c) +{ + unsigned ones = 0; + while (c & 0x80) + { + ones++; + c <<= 1; + } + + return ones; +} + +/* Simple implementation. Assumes the sequence is + * properly synchronized and terminated. */ + +size_t utf8_conv_utf32(uint32_t *out, size_t out_chars, + const char *in, size_t in_size) +{ + unsigned i; + size_t ret = 0; + while (in_size && out_chars) + { + unsigned extra, shift; + uint32_t c; + uint8_t first = *in++; + unsigned ones = leading_ones(first); + + if (ones > 6 || ones == 1) /* Invalid or desync. */ + break; + + extra = ones ? ones - 1 : ones; + if (1 + extra > in_size) /* Overflow. */ + break; + + shift = (extra - 1) * 6; + c = (first & ((1 << (7 - ones)) - 1)) << (6 * extra); + + for (i = 0; i < extra; i++, in++, shift -= 6) + c |= (*in & 0x3f) << shift; + + *out++ = c; + in_size -= 1 + extra; + out_chars--; + ret++; + } + + return ret; +} + +bool utf16_conv_utf8(uint8_t *out, size_t *out_chars, + const uint16_t *in, size_t in_size) +{ + size_t out_pos = 0; + size_t in_pos = 0; + static const + uint8_t utf8_limits[5] = { 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + + for (;;) + { + unsigned num_adds; + uint32_t value; + + if (in_pos == in_size) + { + *out_chars = out_pos; + return true; + } + value = in[in_pos++]; + if (value < 0x80) + { + if (out) + out[out_pos] = (char)value; + out_pos++; + continue; + } + + if (value >= 0xD800 && value < 0xE000) + { + uint32_t c2; + + if (value >= 0xDC00 || in_pos == in_size) + break; + c2 = in[in_pos++]; + if (c2 < 0xDC00 || c2 >= 0xE000) + break; + value = (((value - 0xD800) << 10) | (c2 - 0xDC00)) + 0x10000; + } + + for (num_adds = 1; num_adds < 5; num_adds++) + if (value < (((uint32_t)1) << (num_adds * 5 + 6))) + break; + if (out) + out[out_pos] = (char)(utf8_limits[num_adds - 1] + + (value >> (6 * num_adds))); + out_pos++; + do + { + num_adds--; + if (out) + out[out_pos] = (char)(0x80 + + ((value >> (6 * num_adds)) & 0x3F)); + out_pos++; + }while (num_adds != 0); + } + + *out_chars = out_pos; + return false; +} + +/* Acts mostly like strlcpy. + * + * Copies the given number of UTF-8 characters, + * but at most d_len bytes. + * + * Always NULL terminates. + * Does not copy half a character. + * + * Returns number of bytes. 's' is assumed valid UTF-8. + * Use only if 'chars' is considerably less than 'd_len'. */ +size_t utf8cpy(char *d, size_t d_len, const char *s, size_t chars) +{ + const uint8_t *sb = (const uint8_t*)s; + const uint8_t *sb_org = sb; + + if (!s) + return 0; + + while (*sb && chars-- > 0) + { + sb++; + while ((*sb & 0xC0) == 0x80) + sb++; + } + + if ((size_t)(sb - sb_org) > d_len-1 /* NUL */) + { + sb = sb_org + d_len-1; + while ((*sb & 0xC0) == 0x80) + sb--; + } + + memcpy(d, sb_org, sb-sb_org); + d[sb-sb_org] = '\0'; + + return sb-sb_org; +} + +const char *utf8skip(const char *str, size_t chars) +{ + const uint8_t *strb = (const uint8_t*)str; + + if (!chars) + return str; + + do + { + strb++; + while ((*strb & 0xC0)==0x80) + strb++; + chars--; + }while (chars); + + return (const char*)strb; +} + +size_t utf8len(const char *string) +{ + size_t ret = 0; + + if (!string) + return 0; + + while (*string) + { + if ((*string & 0xC0) != 0x80) + ret++; + string++; + } + return ret; +} + +/* Does not validate the input, returns garbage if it's not UTF-8. */ +uint32_t utf8_walk(const char **string) +{ + uint8_t first = UTF8_WALKBYTE(string); + uint32_t ret = 0; + + if (first < 128) + return first; + + ret = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F); + if (first >= 0xE0) + { + ret = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F); + if (first >= 0xF0) + { + ret = (ret << 6) | (UTF8_WALKBYTE(string) & 0x3F); + return ret | (first & 7) << 18; + } + return ret | (first & 15) << 12; + } + + return ret | (first & 31) << 6; +} + +static bool utf16_to_char(uint8_t **utf_data, + size_t *dest_len, const uint16_t *in) +{ + unsigned len = 0; + + while (in[len] != '\0') + len++; + + utf16_conv_utf8(NULL, dest_len, in, len); + *dest_len += 1; + *utf_data = (uint8_t*)malloc(*dest_len); + if (*utf_data == 0) + return false; + + return utf16_conv_utf8(*utf_data, dest_len, in, len); +} + +bool utf16_to_char_string(const uint16_t *in, char *s, size_t len) +{ + size_t dest_len = 0; + uint8_t *utf16_data = NULL; + bool ret = utf16_to_char(&utf16_data, &dest_len, in); + + if (ret) + { + utf16_data[dest_len] = 0; + strlcpy(s, (const char*)utf16_data, len); + } + + free(utf16_data); + utf16_data = NULL; + + return ret; +} + +#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE) +/* Returned pointer MUST be freed by the caller if non-NULL. */ +static char *mb_to_mb_string_alloc(const char *str, + enum CodePage cp_in, enum CodePage cp_out) +{ + wchar_t *path_buf_wide = NULL; + int path_buf_wide_len = MultiByteToWideChar(cp_in, 0, str, -1, NULL, 0); + + /* Windows 95 will return 0 from these functions with + * a UTF8 codepage set without MSLU. + * + * From an unknown MSDN version (others omit this info): + * - CP_UTF8 Windows 98/Me, Windows NT 4.0 and later: + * Translate using UTF-8. When this is set, dwFlags must be zero. + * - Windows 95: Under the Microsoft Layer for Unicode, + * MultiByteToWideChar also supports CP_UTF7 and CP_UTF8. + */ + + if (!path_buf_wide_len) + return strdup(str); + + path_buf_wide = (wchar_t*) + calloc(path_buf_wide_len + sizeof(wchar_t), sizeof(wchar_t)); + + if (path_buf_wide) + { + MultiByteToWideChar(cp_in, 0, + str, -1, path_buf_wide, path_buf_wide_len); + + if (*path_buf_wide) + { + int path_buf_len = WideCharToMultiByte(cp_out, 0, + path_buf_wide, -1, NULL, 0, NULL, NULL); + + if (path_buf_len) + { + char *path_buf = (char*) + calloc(path_buf_len + sizeof(char), sizeof(char)); + + if (path_buf) + { + WideCharToMultiByte(cp_out, 0, + path_buf_wide, -1, path_buf, + path_buf_len, NULL, NULL); + + free(path_buf_wide); + + if (*path_buf) + return path_buf; + + free(path_buf); + return NULL; + } + } + else + { + free(path_buf_wide); + return strdup(str); + } + } + + free(path_buf_wide); + } + + return NULL; +} +#endif + +/* Returned pointer MUST be freed by the caller if non-NULL. */ +char* utf8_to_local_string_alloc(const char *str) +{ + if (str && *str) + { +#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE) + return mb_to_mb_string_alloc(str, CODEPAGE_UTF8, CODEPAGE_LOCAL); +#else + /* assume string needs no modification if not on Windows */ + return strdup(str); +#endif + } + return NULL; +} + +/* Returned pointer MUST be freed by the caller if non-NULL. */ +char* local_to_utf8_string_alloc(const char *str) +{ + if (str && *str) + { +#if defined(_WIN32) && !defined(_XBOX) && !defined(UNICODE) + return mb_to_mb_string_alloc(str, CODEPAGE_LOCAL, CODEPAGE_UTF8); +#else + /* assume string needs no modification if not on Windows */ + return strdup(str); +#endif + } + return NULL; +} + +/* Returned pointer MUST be freed by the caller if non-NULL. */ +wchar_t* utf8_to_utf16_string_alloc(const char *str) +{ +#ifdef _WIN32 + int len = 0; + int out_len = 0; +#else + size_t len = 0; + size_t out_len = 0; +#endif + wchar_t *buf = NULL; + + if (!str || !*str) + return NULL; + +#ifdef _WIN32 + len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0); + + if (len) + { + buf = (wchar_t*)calloc(len, sizeof(wchar_t)); + + if (!buf) + return NULL; + + out_len = MultiByteToWideChar(CP_UTF8, 0, str, -1, buf, len); + } + else + { + /* fallback to ANSI codepage instead */ + len = MultiByteToWideChar(CP_ACP, 0, str, -1, NULL, 0); + + if (len) + { + buf = (wchar_t*)calloc(len, sizeof(wchar_t)); + + if (!buf) + return NULL; + + out_len = MultiByteToWideChar(CP_ACP, 0, str, -1, buf, len); + } + } + + if (out_len < 0) + { + free(buf); + return NULL; + } +#else + /* NOTE: For now, assume non-Windows platforms' locale is already UTF-8. */ + len = mbstowcs(NULL, str, 0) + 1; + + if (len) + { + buf = (wchar_t*)calloc(len, sizeof(wchar_t)); + + if (!buf) + return NULL; + + out_len = mbstowcs(buf, str, len); + } + + if (out_len == (size_t)-1) + { + free(buf); + return NULL; + } +#endif + + return buf; +} + +/* Returned pointer MUST be freed by the caller if non-NULL. */ +char* utf16_to_utf8_string_alloc(const wchar_t *str) +{ +#ifdef _WIN32 + int len = 0; +#else + size_t len = 0; +#endif + char *buf = NULL; + + if (!str || !*str) + return NULL; + +#ifdef _WIN32 + { + UINT code_page = CP_UTF8; + len = WideCharToMultiByte(code_page, + 0, str, -1, NULL, 0, NULL, NULL); + + /* fallback to ANSI codepage instead */ + if (!len) + { + code_page = CP_ACP; + len = WideCharToMultiByte(code_page, + 0, str, -1, NULL, 0, NULL, NULL); + } + + buf = (char*)calloc(len, sizeof(char)); + + if (!buf) + return NULL; + + if (WideCharToMultiByte(code_page, + 0, str, -1, buf, len, NULL, NULL) < 0) + { + free(buf); + return NULL; + } + } +#else + /* NOTE: For now, assume non-Windows platforms' + * locale is already UTF-8. */ + len = wcstombs(NULL, str, 0) + 1; + + if (len) + { + buf = (char*)calloc(len, sizeof(char)); + + if (!buf) + return NULL; + + if (wcstombs(buf, str, len) == (size_t)-1) + { + free(buf); + return NULL; + } + } +#endif + + return buf; +} diff --git a/platform/libretro/libretro-common/file/file_path.c b/platform/libretro/libretro-common/file/file_path.c new file mode 100644 index 00000000..c696ff04 --- /dev/null +++ b/platform/libretro/libretro-common/file/file_path.c @@ -0,0 +1,1447 @@ +/* Copyright (C) 2010-2020 The RetroArch team + * + * --------------------------------------------------------------------------------------- + * The following license statement only applies to this file (file_path.c). + * --------------------------------------------------------------------------------------- + * + * Permission is hereby granted, free of charge, + * to any person obtaining a copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include